1/* memset with SSE2 and REP string. 2 Copyright (C) 2010-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if IS_IN (libc) 20 21#include <sysdep.h> 22#include "asm-syntax.h" 23 24#define CFI_PUSH(REG) \ 25 cfi_adjust_cfa_offset (4); \ 26 cfi_rel_offset (REG, 0) 27 28#define CFI_POP(REG) \ 29 cfi_adjust_cfa_offset (-4); \ 30 cfi_restore (REG) 31 32#define PUSH(REG) pushl REG; CFI_PUSH (REG) 33#define POP(REG) popl REG; CFI_POP (REG) 34 35#ifdef USE_AS_BZERO 36# define DEST PARMS 37# define LEN DEST+4 38# define SETRTNVAL 39#else 40# define DEST PARMS 41# define CHR DEST+4 42# define LEN CHR+4 43# define SETRTNVAL movl DEST(%esp), %eax 44#endif 45 46#ifdef PIC 47# define ENTRANCE PUSH (%ebx); 48# define RETURN_END POP (%ebx); ret 49# define RETURN RETURN_END; CFI_PUSH (%ebx) 50# define PARMS 8 /* Preserve EBX. */ 51# define JMPTBL(I, B) I - B 52 53/* Load an entry in a jump table into EBX and branch to it. TABLE is a 54 jump table with relative offsets. */ 55# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 56 /* We first load PC into EBX. */ \ 57 SETUP_PIC_REG(bx); \ 58 /* Get the address of the jump table. */ \ 59 add $(TABLE - .), %ebx; \ 60 /* Get the entry and convert the relative offset to the \ 61 absolute address. */ \ 62 add (%ebx,%ecx,4), %ebx; \ 63 add %ecx, %edx; \ 64 /* We loaded the jump table and adjusted EDX. Go. */ \ 65 _CET_NOTRACK jmp *%ebx 66#else 67# define ENTRANCE 68# define RETURN_END ret 69# define RETURN RETURN_END 70# define PARMS 4 71# define JMPTBL(I, B) I 72 73/* Branch to an entry in a jump table. TABLE is a jump table with 74 absolute offsets. */ 75# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 76 add %ecx, %edx; \ 77 _CET_NOTRACK jmp *TABLE(,%ecx,4) 78#endif 79 80 .section .text.sse2,"ax",@progbits 81#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO 82ENTRY (__memset_chk_sse2_rep) 83 movl 12(%esp), %eax 84 cmpl %eax, 16(%esp) 85 jb HIDDEN_JUMPTARGET (__chk_fail) 86END (__memset_chk_sse2_rep) 87#endif 88ENTRY (__memset_sse2_rep) 89 ENTRANCE 90 91 movl LEN(%esp), %ecx 92#ifdef USE_AS_BZERO 93 xor %eax, %eax 94#else 95 movzbl CHR(%esp), %eax 96 movb %al, %ah 97 /* Fill the whole EAX with pattern. */ 98 movl %eax, %edx 99 shl $16, %eax 100 or %edx, %eax 101#endif 102 movl DEST(%esp), %edx 103 cmp $32, %ecx 104 jae L(32bytesormore) 105 106L(write_less32bytes): 107 BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes)) 108 109 110 .pushsection .rodata.sse2,"a",@progbits 111 ALIGN (2) 112L(table_less_32bytes): 113 .int JMPTBL (L(write_0bytes), L(table_less_32bytes)) 114 .int JMPTBL (L(write_1bytes), L(table_less_32bytes)) 115 .int JMPTBL (L(write_2bytes), L(table_less_32bytes)) 116 .int JMPTBL (L(write_3bytes), L(table_less_32bytes)) 117 .int JMPTBL (L(write_4bytes), L(table_less_32bytes)) 118 .int JMPTBL (L(write_5bytes), L(table_less_32bytes)) 119 .int JMPTBL (L(write_6bytes), L(table_less_32bytes)) 120 .int JMPTBL (L(write_7bytes), L(table_less_32bytes)) 121 .int JMPTBL (L(write_8bytes), L(table_less_32bytes)) 122 .int JMPTBL (L(write_9bytes), L(table_less_32bytes)) 123 .int JMPTBL (L(write_10bytes), L(table_less_32bytes)) 124 .int JMPTBL (L(write_11bytes), L(table_less_32bytes)) 125 .int JMPTBL (L(write_12bytes), L(table_less_32bytes)) 126 .int JMPTBL (L(write_13bytes), L(table_less_32bytes)) 127 .int JMPTBL (L(write_14bytes), L(table_less_32bytes)) 128 .int JMPTBL (L(write_15bytes), L(table_less_32bytes)) 129 .int JMPTBL (L(write_16bytes), L(table_less_32bytes)) 130 .int JMPTBL (L(write_17bytes), L(table_less_32bytes)) 131 .int JMPTBL (L(write_18bytes), L(table_less_32bytes)) 132 .int JMPTBL (L(write_19bytes), L(table_less_32bytes)) 133 .int JMPTBL (L(write_20bytes), L(table_less_32bytes)) 134 .int JMPTBL (L(write_21bytes), L(table_less_32bytes)) 135 .int JMPTBL (L(write_22bytes), L(table_less_32bytes)) 136 .int JMPTBL (L(write_23bytes), L(table_less_32bytes)) 137 .int JMPTBL (L(write_24bytes), L(table_less_32bytes)) 138 .int JMPTBL (L(write_25bytes), L(table_less_32bytes)) 139 .int JMPTBL (L(write_26bytes), L(table_less_32bytes)) 140 .int JMPTBL (L(write_27bytes), L(table_less_32bytes)) 141 .int JMPTBL (L(write_28bytes), L(table_less_32bytes)) 142 .int JMPTBL (L(write_29bytes), L(table_less_32bytes)) 143 .int JMPTBL (L(write_30bytes), L(table_less_32bytes)) 144 .int JMPTBL (L(write_31bytes), L(table_less_32bytes)) 145 .popsection 146 147 ALIGN (4) 148L(write_28bytes): 149 movl %eax, -28(%edx) 150L(write_24bytes): 151 movl %eax, -24(%edx) 152L(write_20bytes): 153 movl %eax, -20(%edx) 154L(write_16bytes): 155 movl %eax, -16(%edx) 156L(write_12bytes): 157 movl %eax, -12(%edx) 158L(write_8bytes): 159 movl %eax, -8(%edx) 160L(write_4bytes): 161 movl %eax, -4(%edx) 162L(write_0bytes): 163 SETRTNVAL 164 RETURN 165 166 ALIGN (4) 167L(write_29bytes): 168 movl %eax, -29(%edx) 169L(write_25bytes): 170 movl %eax, -25(%edx) 171L(write_21bytes): 172 movl %eax, -21(%edx) 173L(write_17bytes): 174 movl %eax, -17(%edx) 175L(write_13bytes): 176 movl %eax, -13(%edx) 177L(write_9bytes): 178 movl %eax, -9(%edx) 179L(write_5bytes): 180 movl %eax, -5(%edx) 181L(write_1bytes): 182 movb %al, -1(%edx) 183 SETRTNVAL 184 RETURN 185 186 ALIGN (4) 187L(write_30bytes): 188 movl %eax, -30(%edx) 189L(write_26bytes): 190 movl %eax, -26(%edx) 191L(write_22bytes): 192 movl %eax, -22(%edx) 193L(write_18bytes): 194 movl %eax, -18(%edx) 195L(write_14bytes): 196 movl %eax, -14(%edx) 197L(write_10bytes): 198 movl %eax, -10(%edx) 199L(write_6bytes): 200 movl %eax, -6(%edx) 201L(write_2bytes): 202 movw %ax, -2(%edx) 203 SETRTNVAL 204 RETURN 205 206 ALIGN (4) 207L(write_31bytes): 208 movl %eax, -31(%edx) 209L(write_27bytes): 210 movl %eax, -27(%edx) 211L(write_23bytes): 212 movl %eax, -23(%edx) 213L(write_19bytes): 214 movl %eax, -19(%edx) 215L(write_15bytes): 216 movl %eax, -15(%edx) 217L(write_11bytes): 218 movl %eax, -11(%edx) 219L(write_7bytes): 220 movl %eax, -7(%edx) 221L(write_3bytes): 222 movw %ax, -3(%edx) 223 movb %al, -1(%edx) 224 SETRTNVAL 225 RETURN 226 227 ALIGN (4) 228/* ECX > 32 and EDX is 4 byte aligned. */ 229L(32bytesormore): 230 /* Fill xmm0 with the pattern. */ 231#ifdef USE_AS_BZERO 232 pxor %xmm0, %xmm0 233#else 234 movd %eax, %xmm0 235 pshufd $0, %xmm0, %xmm0 236#endif 237 testl $0xf, %edx 238 jz L(aligned_16) 239/* ECX > 32 and EDX is not 16 byte aligned. */ 240L(not_aligned_16): 241 movdqu %xmm0, (%edx) 242 movl %edx, %eax 243 and $-16, %edx 244 add $16, %edx 245 sub %edx, %eax 246 add %eax, %ecx 247 movd %xmm0, %eax 248 249 ALIGN (4) 250L(aligned_16): 251 cmp $128, %ecx 252 jae L(128bytesormore) 253 254L(aligned_16_less128bytes): 255 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 256 257 ALIGN (4) 258L(128bytesormore): 259 PUSH (%edi) 260#ifdef DATA_CACHE_SIZE 261 PUSH (%ebx) 262 mov $DATA_CACHE_SIZE, %ebx 263#else 264# ifdef PIC 265 SETUP_PIC_REG(bx) 266 add $_GLOBAL_OFFSET_TABLE_, %ebx 267 mov __x86_data_cache_size@GOTOFF(%ebx), %ebx 268# else 269 PUSH (%ebx) 270 mov __x86_data_cache_size, %ebx 271# endif 272#endif 273 mov %ebx, %edi 274 shr $4, %ebx 275 sub %ebx, %edi 276#if defined DATA_CACHE_SIZE || !defined PIC 277 POP (%ebx) 278#endif 279/* 280 * When data size approximate the end of L1 cache, 281 * fast string will prefetch and combine data efficiently. 282 */ 283 cmp %edi, %ecx 284 jae L(128bytesormore_endof_L1) 285 subl $128, %ecx 286L(128bytesormore_normal): 287 sub $128, %ecx 288 movdqa %xmm0, (%edx) 289 movdqa %xmm0, 0x10(%edx) 290 movdqa %xmm0, 0x20(%edx) 291 movdqa %xmm0, 0x30(%edx) 292 movdqa %xmm0, 0x40(%edx) 293 movdqa %xmm0, 0x50(%edx) 294 movdqa %xmm0, 0x60(%edx) 295 movdqa %xmm0, 0x70(%edx) 296 lea 128(%edx), %edx 297 jb L(128bytesless_normal) 298 299 300 sub $128, %ecx 301 movdqa %xmm0, (%edx) 302 movdqa %xmm0, 0x10(%edx) 303 movdqa %xmm0, 0x20(%edx) 304 movdqa %xmm0, 0x30(%edx) 305 movdqa %xmm0, 0x40(%edx) 306 movdqa %xmm0, 0x50(%edx) 307 movdqa %xmm0, 0x60(%edx) 308 movdqa %xmm0, 0x70(%edx) 309 lea 128(%edx), %edx 310 jae L(128bytesormore_normal) 311 312L(128bytesless_normal): 313 POP (%edi) 314 add $128, %ecx 315 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 316 317 CFI_PUSH (%edi) 318 ALIGN (4) 319L(128bytesormore_endof_L1): 320 mov %edx, %edi 321 mov %ecx, %edx 322 shr $2, %ecx 323 and $3, %edx 324 rep stosl 325 jz L(copy_page_by_rep_exit) 326 cmp $2, %edx 327 jb L(copy_page_by_rep_left_1) 328 movw %ax, (%edi) 329 add $2, %edi 330 sub $2, %edx 331 jz L(copy_page_by_rep_exit) 332L(copy_page_by_rep_left_1): 333 movb %al, (%edi) 334L(copy_page_by_rep_exit): 335 POP (%edi) 336 SETRTNVAL 337 RETURN 338 339 .pushsection .rodata.sse2,"a",@progbits 340 ALIGN (2) 341L(table_16_128bytes): 342 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) 343 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes)) 344 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) 345 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes)) 346 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) 347 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes)) 348 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) 349 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes)) 350 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) 351 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes)) 352 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) 353 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes)) 354 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) 355 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes)) 356 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) 357 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes)) 358 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) 359 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes)) 360 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) 361 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes)) 362 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) 363 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes)) 364 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) 365 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes)) 366 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) 367 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes)) 368 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) 369 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes)) 370 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) 371 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes)) 372 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) 373 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes)) 374 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) 375 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes)) 376 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) 377 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes)) 378 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) 379 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes)) 380 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) 381 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes)) 382 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) 383 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes)) 384 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) 385 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes)) 386 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) 387 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes)) 388 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) 389 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes)) 390 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) 391 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes)) 392 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) 393 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes)) 394 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) 395 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes)) 396 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) 397 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes)) 398 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) 399 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes)) 400 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) 401 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes)) 402 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) 403 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes)) 404 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) 405 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes)) 406 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) 407 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes)) 408 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) 409 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes)) 410 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) 411 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes)) 412 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) 413 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes)) 414 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) 415 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes)) 416 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) 417 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes)) 418 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) 419 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes)) 420 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) 421 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes)) 422 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) 423 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes)) 424 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) 425 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes)) 426 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) 427 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes)) 428 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) 429 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes)) 430 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) 431 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes)) 432 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) 433 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes)) 434 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) 435 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes)) 436 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) 437 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes)) 438 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) 439 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes)) 440 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) 441 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes)) 442 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) 443 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes)) 444 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) 445 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes)) 446 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) 447 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes)) 448 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) 449 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes)) 450 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) 451 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes)) 452 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) 453 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes)) 454 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) 455 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes)) 456 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) 457 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes)) 458 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) 459 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes)) 460 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) 461 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes)) 462 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) 463 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes)) 464 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) 465 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes)) 466 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) 467 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes)) 468 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) 469 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes)) 470 .popsection 471 472 ALIGN (4) 473L(aligned_16_112bytes): 474 movdqa %xmm0, -112(%edx) 475L(aligned_16_96bytes): 476 movdqa %xmm0, -96(%edx) 477L(aligned_16_80bytes): 478 movdqa %xmm0, -80(%edx) 479L(aligned_16_64bytes): 480 movdqa %xmm0, -64(%edx) 481L(aligned_16_48bytes): 482 movdqa %xmm0, -48(%edx) 483L(aligned_16_32bytes): 484 movdqa %xmm0, -32(%edx) 485L(aligned_16_16bytes): 486 movdqa %xmm0, -16(%edx) 487L(aligned_16_0bytes): 488 SETRTNVAL 489 RETURN 490 491 ALIGN (4) 492L(aligned_16_113bytes): 493 movdqa %xmm0, -113(%edx) 494L(aligned_16_97bytes): 495 movdqa %xmm0, -97(%edx) 496L(aligned_16_81bytes): 497 movdqa %xmm0, -81(%edx) 498L(aligned_16_65bytes): 499 movdqa %xmm0, -65(%edx) 500L(aligned_16_49bytes): 501 movdqa %xmm0, -49(%edx) 502L(aligned_16_33bytes): 503 movdqa %xmm0, -33(%edx) 504L(aligned_16_17bytes): 505 movdqa %xmm0, -17(%edx) 506L(aligned_16_1bytes): 507 movb %al, -1(%edx) 508 SETRTNVAL 509 RETURN 510 511 ALIGN (4) 512L(aligned_16_114bytes): 513 movdqa %xmm0, -114(%edx) 514L(aligned_16_98bytes): 515 movdqa %xmm0, -98(%edx) 516L(aligned_16_82bytes): 517 movdqa %xmm0, -82(%edx) 518L(aligned_16_66bytes): 519 movdqa %xmm0, -66(%edx) 520L(aligned_16_50bytes): 521 movdqa %xmm0, -50(%edx) 522L(aligned_16_34bytes): 523 movdqa %xmm0, -34(%edx) 524L(aligned_16_18bytes): 525 movdqa %xmm0, -18(%edx) 526L(aligned_16_2bytes): 527 movw %ax, -2(%edx) 528 SETRTNVAL 529 RETURN 530 531 ALIGN (4) 532L(aligned_16_115bytes): 533 movdqa %xmm0, -115(%edx) 534L(aligned_16_99bytes): 535 movdqa %xmm0, -99(%edx) 536L(aligned_16_83bytes): 537 movdqa %xmm0, -83(%edx) 538L(aligned_16_67bytes): 539 movdqa %xmm0, -67(%edx) 540L(aligned_16_51bytes): 541 movdqa %xmm0, -51(%edx) 542L(aligned_16_35bytes): 543 movdqa %xmm0, -35(%edx) 544L(aligned_16_19bytes): 545 movdqa %xmm0, -19(%edx) 546L(aligned_16_3bytes): 547 movw %ax, -3(%edx) 548 movb %al, -1(%edx) 549 SETRTNVAL 550 RETURN 551 552 ALIGN (4) 553L(aligned_16_116bytes): 554 movdqa %xmm0, -116(%edx) 555L(aligned_16_100bytes): 556 movdqa %xmm0, -100(%edx) 557L(aligned_16_84bytes): 558 movdqa %xmm0, -84(%edx) 559L(aligned_16_68bytes): 560 movdqa %xmm0, -68(%edx) 561L(aligned_16_52bytes): 562 movdqa %xmm0, -52(%edx) 563L(aligned_16_36bytes): 564 movdqa %xmm0, -36(%edx) 565L(aligned_16_20bytes): 566 movdqa %xmm0, -20(%edx) 567L(aligned_16_4bytes): 568 movl %eax, -4(%edx) 569 SETRTNVAL 570 RETURN 571 572 ALIGN (4) 573L(aligned_16_117bytes): 574 movdqa %xmm0, -117(%edx) 575L(aligned_16_101bytes): 576 movdqa %xmm0, -101(%edx) 577L(aligned_16_85bytes): 578 movdqa %xmm0, -85(%edx) 579L(aligned_16_69bytes): 580 movdqa %xmm0, -69(%edx) 581L(aligned_16_53bytes): 582 movdqa %xmm0, -53(%edx) 583L(aligned_16_37bytes): 584 movdqa %xmm0, -37(%edx) 585L(aligned_16_21bytes): 586 movdqa %xmm0, -21(%edx) 587L(aligned_16_5bytes): 588 movl %eax, -5(%edx) 589 movb %al, -1(%edx) 590 SETRTNVAL 591 RETURN 592 593 ALIGN (4) 594L(aligned_16_118bytes): 595 movdqa %xmm0, -118(%edx) 596L(aligned_16_102bytes): 597 movdqa %xmm0, -102(%edx) 598L(aligned_16_86bytes): 599 movdqa %xmm0, -86(%edx) 600L(aligned_16_70bytes): 601 movdqa %xmm0, -70(%edx) 602L(aligned_16_54bytes): 603 movdqa %xmm0, -54(%edx) 604L(aligned_16_38bytes): 605 movdqa %xmm0, -38(%edx) 606L(aligned_16_22bytes): 607 movdqa %xmm0, -22(%edx) 608L(aligned_16_6bytes): 609 movl %eax, -6(%edx) 610 movw %ax, -2(%edx) 611 SETRTNVAL 612 RETURN 613 614 ALIGN (4) 615L(aligned_16_119bytes): 616 movdqa %xmm0, -119(%edx) 617L(aligned_16_103bytes): 618 movdqa %xmm0, -103(%edx) 619L(aligned_16_87bytes): 620 movdqa %xmm0, -87(%edx) 621L(aligned_16_71bytes): 622 movdqa %xmm0, -71(%edx) 623L(aligned_16_55bytes): 624 movdqa %xmm0, -55(%edx) 625L(aligned_16_39bytes): 626 movdqa %xmm0, -39(%edx) 627L(aligned_16_23bytes): 628 movdqa %xmm0, -23(%edx) 629L(aligned_16_7bytes): 630 movl %eax, -7(%edx) 631 movw %ax, -3(%edx) 632 movb %al, -1(%edx) 633 SETRTNVAL 634 RETURN 635 636 ALIGN (4) 637L(aligned_16_120bytes): 638 movdqa %xmm0, -120(%edx) 639L(aligned_16_104bytes): 640 movdqa %xmm0, -104(%edx) 641L(aligned_16_88bytes): 642 movdqa %xmm0, -88(%edx) 643L(aligned_16_72bytes): 644 movdqa %xmm0, -72(%edx) 645L(aligned_16_56bytes): 646 movdqa %xmm0, -56(%edx) 647L(aligned_16_40bytes): 648 movdqa %xmm0, -40(%edx) 649L(aligned_16_24bytes): 650 movdqa %xmm0, -24(%edx) 651L(aligned_16_8bytes): 652 movq %xmm0, -8(%edx) 653 SETRTNVAL 654 RETURN 655 656 ALIGN (4) 657L(aligned_16_121bytes): 658 movdqa %xmm0, -121(%edx) 659L(aligned_16_105bytes): 660 movdqa %xmm0, -105(%edx) 661L(aligned_16_89bytes): 662 movdqa %xmm0, -89(%edx) 663L(aligned_16_73bytes): 664 movdqa %xmm0, -73(%edx) 665L(aligned_16_57bytes): 666 movdqa %xmm0, -57(%edx) 667L(aligned_16_41bytes): 668 movdqa %xmm0, -41(%edx) 669L(aligned_16_25bytes): 670 movdqa %xmm0, -25(%edx) 671L(aligned_16_9bytes): 672 movq %xmm0, -9(%edx) 673 movb %al, -1(%edx) 674 SETRTNVAL 675 RETURN 676 677 ALIGN (4) 678L(aligned_16_122bytes): 679 movdqa %xmm0, -122(%edx) 680L(aligned_16_106bytes): 681 movdqa %xmm0, -106(%edx) 682L(aligned_16_90bytes): 683 movdqa %xmm0, -90(%edx) 684L(aligned_16_74bytes): 685 movdqa %xmm0, -74(%edx) 686L(aligned_16_58bytes): 687 movdqa %xmm0, -58(%edx) 688L(aligned_16_42bytes): 689 movdqa %xmm0, -42(%edx) 690L(aligned_16_26bytes): 691 movdqa %xmm0, -26(%edx) 692L(aligned_16_10bytes): 693 movq %xmm0, -10(%edx) 694 movw %ax, -2(%edx) 695 SETRTNVAL 696 RETURN 697 698 ALIGN (4) 699L(aligned_16_123bytes): 700 movdqa %xmm0, -123(%edx) 701L(aligned_16_107bytes): 702 movdqa %xmm0, -107(%edx) 703L(aligned_16_91bytes): 704 movdqa %xmm0, -91(%edx) 705L(aligned_16_75bytes): 706 movdqa %xmm0, -75(%edx) 707L(aligned_16_59bytes): 708 movdqa %xmm0, -59(%edx) 709L(aligned_16_43bytes): 710 movdqa %xmm0, -43(%edx) 711L(aligned_16_27bytes): 712 movdqa %xmm0, -27(%edx) 713L(aligned_16_11bytes): 714 movq %xmm0, -11(%edx) 715 movw %ax, -3(%edx) 716 movb %al, -1(%edx) 717 SETRTNVAL 718 RETURN 719 720 ALIGN (4) 721L(aligned_16_124bytes): 722 movdqa %xmm0, -124(%edx) 723L(aligned_16_108bytes): 724 movdqa %xmm0, -108(%edx) 725L(aligned_16_92bytes): 726 movdqa %xmm0, -92(%edx) 727L(aligned_16_76bytes): 728 movdqa %xmm0, -76(%edx) 729L(aligned_16_60bytes): 730 movdqa %xmm0, -60(%edx) 731L(aligned_16_44bytes): 732 movdqa %xmm0, -44(%edx) 733L(aligned_16_28bytes): 734 movdqa %xmm0, -28(%edx) 735L(aligned_16_12bytes): 736 movq %xmm0, -12(%edx) 737 movl %eax, -4(%edx) 738 SETRTNVAL 739 RETURN 740 741 ALIGN (4) 742L(aligned_16_125bytes): 743 movdqa %xmm0, -125(%edx) 744L(aligned_16_109bytes): 745 movdqa %xmm0, -109(%edx) 746L(aligned_16_93bytes): 747 movdqa %xmm0, -93(%edx) 748L(aligned_16_77bytes): 749 movdqa %xmm0, -77(%edx) 750L(aligned_16_61bytes): 751 movdqa %xmm0, -61(%edx) 752L(aligned_16_45bytes): 753 movdqa %xmm0, -45(%edx) 754L(aligned_16_29bytes): 755 movdqa %xmm0, -29(%edx) 756L(aligned_16_13bytes): 757 movq %xmm0, -13(%edx) 758 movl %eax, -5(%edx) 759 movb %al, -1(%edx) 760 SETRTNVAL 761 RETURN 762 763 ALIGN (4) 764L(aligned_16_126bytes): 765 movdqa %xmm0, -126(%edx) 766L(aligned_16_110bytes): 767 movdqa %xmm0, -110(%edx) 768L(aligned_16_94bytes): 769 movdqa %xmm0, -94(%edx) 770L(aligned_16_78bytes): 771 movdqa %xmm0, -78(%edx) 772L(aligned_16_62bytes): 773 movdqa %xmm0, -62(%edx) 774L(aligned_16_46bytes): 775 movdqa %xmm0, -46(%edx) 776L(aligned_16_30bytes): 777 movdqa %xmm0, -30(%edx) 778L(aligned_16_14bytes): 779 movq %xmm0, -14(%edx) 780 movl %eax, -6(%edx) 781 movw %ax, -2(%edx) 782 SETRTNVAL 783 RETURN 784 785 ALIGN (4) 786L(aligned_16_127bytes): 787 movdqa %xmm0, -127(%edx) 788L(aligned_16_111bytes): 789 movdqa %xmm0, -111(%edx) 790L(aligned_16_95bytes): 791 movdqa %xmm0, -95(%edx) 792L(aligned_16_79bytes): 793 movdqa %xmm0, -79(%edx) 794L(aligned_16_63bytes): 795 movdqa %xmm0, -63(%edx) 796L(aligned_16_47bytes): 797 movdqa %xmm0, -47(%edx) 798L(aligned_16_31bytes): 799 movdqa %xmm0, -31(%edx) 800L(aligned_16_15bytes): 801 movq %xmm0, -15(%edx) 802 movl %eax, -7(%edx) 803 movw %ax, -3(%edx) 804 movb %al, -1(%edx) 805 SETRTNVAL 806 RETURN_END 807 808END (__memset_sse2_rep) 809 810#endif 811