1/* strcpy with SSE2 and unaligned load 2 Copyright (C) 2011-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if IS_IN (libc) 20 21# ifndef USE_AS_STRCAT 22# include <sysdep.h> 23 24# ifndef STRCPY 25# define STRCPY __strcpy_sse2_unaligned 26# endif 27 28# endif 29 30# define JMPTBL(I, B) I - B 31# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ 32 lea TABLE(%rip), %r11; \ 33 movslq (%r11, INDEX, SCALE), %rcx; \ 34 lea (%r11, %rcx), %rcx; \ 35 _CET_NOTRACK jmp *%rcx 36 37# ifndef USE_AS_STRCAT 38 39.text 40ENTRY (STRCPY) 41# ifdef USE_AS_STRNCPY 42 mov %RDX_LP, %R8_LP 43 test %R8_LP, %R8_LP 44 jz L(ExitZero) 45# endif 46 mov %rsi, %rcx 47# ifndef USE_AS_STPCPY 48 mov %rdi, %rax /* save result */ 49# endif 50 51# endif 52 53 and $63, %rcx 54 cmp $32, %rcx 55 jbe L(SourceStringAlignmentLess32) 56 57 and $-16, %rsi 58 and $15, %rcx 59 pxor %xmm0, %xmm0 60 pxor %xmm1, %xmm1 61 62 pcmpeqb (%rsi), %xmm1 63 pmovmskb %xmm1, %rdx 64 shr %cl, %rdx 65 66# ifdef USE_AS_STRNCPY 67# if defined USE_AS_STPCPY || defined USE_AS_STRCAT 68 mov $16, %r10 69 sub %rcx, %r10 70 cmp %r10, %r8 71# else 72 mov $17, %r10 73 sub %rcx, %r10 74 cmp %r10, %r8 75# endif 76 jbe L(CopyFrom1To16BytesTailCase2OrCase3) 77# endif 78 test %rdx, %rdx 79 jnz L(CopyFrom1To16BytesTail) 80 81 pcmpeqb 16(%rsi), %xmm0 82 pmovmskb %xmm0, %rdx 83 84# ifdef USE_AS_STRNCPY 85 add $16, %r10 86 cmp %r10, %r8 87 jbe L(CopyFrom1To32BytesCase2OrCase3) 88# endif 89 test %rdx, %rdx 90 jnz L(CopyFrom1To32Bytes) 91 92 movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */ 93 movdqu %xmm1, (%rdi) 94 95/* If source address alignment != destination address alignment */ 96 .p2align 4 97L(Unalign16Both): 98 sub %rcx, %rdi 99# ifdef USE_AS_STRNCPY 100 add %rcx, %r8 101 sbb %rcx, %rcx 102 or %rcx, %r8 103# endif 104 mov $16, %rcx 105 movdqa (%rsi, %rcx), %xmm1 106 movaps 16(%rsi, %rcx), %xmm2 107 movdqu %xmm1, (%rdi, %rcx) 108 pcmpeqb %xmm2, %xmm0 109 pmovmskb %xmm0, %rdx 110 add $16, %rcx 111# ifdef USE_AS_STRNCPY 112 sub $48, %r8 113 jbe L(CopyFrom1To16BytesCase2OrCase3) 114# endif 115 test %rdx, %rdx 116# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 117 jnz L(CopyFrom1To16BytesUnalignedXmm2) 118# else 119 jnz L(CopyFrom1To16Bytes) 120# endif 121 122 movaps 16(%rsi, %rcx), %xmm3 123 movdqu %xmm2, (%rdi, %rcx) 124 pcmpeqb %xmm3, %xmm0 125 pmovmskb %xmm0, %rdx 126 add $16, %rcx 127# ifdef USE_AS_STRNCPY 128 sub $16, %r8 129 jbe L(CopyFrom1To16BytesCase2OrCase3) 130# endif 131 test %rdx, %rdx 132# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 133 jnz L(CopyFrom1To16BytesUnalignedXmm3) 134# else 135 jnz L(CopyFrom1To16Bytes) 136# endif 137 138 movaps 16(%rsi, %rcx), %xmm4 139 movdqu %xmm3, (%rdi, %rcx) 140 pcmpeqb %xmm4, %xmm0 141 pmovmskb %xmm0, %rdx 142 add $16, %rcx 143# ifdef USE_AS_STRNCPY 144 sub $16, %r8 145 jbe L(CopyFrom1To16BytesCase2OrCase3) 146# endif 147 test %rdx, %rdx 148# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 149 jnz L(CopyFrom1To16BytesUnalignedXmm4) 150# else 151 jnz L(CopyFrom1To16Bytes) 152# endif 153 154 movaps 16(%rsi, %rcx), %xmm1 155 movdqu %xmm4, (%rdi, %rcx) 156 pcmpeqb %xmm1, %xmm0 157 pmovmskb %xmm0, %rdx 158 add $16, %rcx 159# ifdef USE_AS_STRNCPY 160 sub $16, %r8 161 jbe L(CopyFrom1To16BytesCase2OrCase3) 162# endif 163 test %rdx, %rdx 164# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 165 jnz L(CopyFrom1To16BytesUnalignedXmm1) 166# else 167 jnz L(CopyFrom1To16Bytes) 168# endif 169 170 movaps 16(%rsi, %rcx), %xmm2 171 movdqu %xmm1, (%rdi, %rcx) 172 pcmpeqb %xmm2, %xmm0 173 pmovmskb %xmm0, %rdx 174 add $16, %rcx 175# ifdef USE_AS_STRNCPY 176 sub $16, %r8 177 jbe L(CopyFrom1To16BytesCase2OrCase3) 178# endif 179 test %rdx, %rdx 180# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 181 jnz L(CopyFrom1To16BytesUnalignedXmm2) 182# else 183 jnz L(CopyFrom1To16Bytes) 184# endif 185 186 movaps 16(%rsi, %rcx), %xmm3 187 movdqu %xmm2, (%rdi, %rcx) 188 pcmpeqb %xmm3, %xmm0 189 pmovmskb %xmm0, %rdx 190 add $16, %rcx 191# ifdef USE_AS_STRNCPY 192 sub $16, %r8 193 jbe L(CopyFrom1To16BytesCase2OrCase3) 194# endif 195 test %rdx, %rdx 196# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 197 jnz L(CopyFrom1To16BytesUnalignedXmm3) 198# else 199 jnz L(CopyFrom1To16Bytes) 200# endif 201 202 movdqu %xmm3, (%rdi, %rcx) 203 mov %rsi, %rdx 204 lea 16(%rsi, %rcx), %rsi 205 and $-0x40, %rsi 206 sub %rsi, %rdx 207 sub %rdx, %rdi 208# ifdef USE_AS_STRNCPY 209 lea 128(%r8, %rdx), %r8 210# endif 211L(Unaligned64Loop): 212 movaps (%rsi), %xmm2 213 movaps %xmm2, %xmm4 214 movaps 16(%rsi), %xmm5 215 movaps 32(%rsi), %xmm3 216 movaps %xmm3, %xmm6 217 movaps 48(%rsi), %xmm7 218 pminub %xmm5, %xmm2 219 pminub %xmm7, %xmm3 220 pminub %xmm2, %xmm3 221 pcmpeqb %xmm0, %xmm3 222 pmovmskb %xmm3, %rdx 223# ifdef USE_AS_STRNCPY 224 sub $64, %r8 225 jbe L(UnalignedLeaveCase2OrCase3) 226# endif 227 test %rdx, %rdx 228 jnz L(Unaligned64Leave) 229 230L(Unaligned64Loop_start): 231 add $64, %rdi 232 add $64, %rsi 233 movdqu %xmm4, -64(%rdi) 234 movaps (%rsi), %xmm2 235 movdqa %xmm2, %xmm4 236 movdqu %xmm5, -48(%rdi) 237 movaps 16(%rsi), %xmm5 238 pminub %xmm5, %xmm2 239 movaps 32(%rsi), %xmm3 240 movdqu %xmm6, -32(%rdi) 241 movaps %xmm3, %xmm6 242 movdqu %xmm7, -16(%rdi) 243 movaps 48(%rsi), %xmm7 244 pminub %xmm7, %xmm3 245 pminub %xmm2, %xmm3 246 pcmpeqb %xmm0, %xmm3 247 pmovmskb %xmm3, %rdx 248# ifdef USE_AS_STRNCPY 249 sub $64, %r8 250 jbe L(UnalignedLeaveCase2OrCase3) 251# endif 252 test %rdx, %rdx 253 jz L(Unaligned64Loop_start) 254 255L(Unaligned64Leave): 256 pxor %xmm1, %xmm1 257 258 pcmpeqb %xmm4, %xmm0 259 pcmpeqb %xmm5, %xmm1 260 pmovmskb %xmm0, %rdx 261 pmovmskb %xmm1, %rcx 262 test %rdx, %rdx 263 jnz L(CopyFrom1To16BytesUnaligned_0) 264 test %rcx, %rcx 265 jnz L(CopyFrom1To16BytesUnaligned_16) 266 267 pcmpeqb %xmm6, %xmm0 268 pcmpeqb %xmm7, %xmm1 269 pmovmskb %xmm0, %rdx 270 pmovmskb %xmm1, %rcx 271 test %rdx, %rdx 272 jnz L(CopyFrom1To16BytesUnaligned_32) 273 274 bsf %rcx, %rdx 275 movdqu %xmm4, (%rdi) 276 movdqu %xmm5, 16(%rdi) 277 movdqu %xmm6, 32(%rdi) 278# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 279# ifdef USE_AS_STPCPY 280 lea 48(%rdi, %rdx), %rax 281# endif 282 movdqu %xmm7, 48(%rdi) 283 add $15, %r8 284 sub %rdx, %r8 285 lea 49(%rdi, %rdx), %rdi 286 jmp L(StrncpyFillTailWithZero) 287# else 288 add $48, %rsi 289 add $48, %rdi 290 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 291# endif 292 293/* If source address alignment == destination address alignment */ 294 295L(SourceStringAlignmentLess32): 296 pxor %xmm0, %xmm0 297 movdqu (%rsi), %xmm1 298 movdqu 16(%rsi), %xmm2 299 pcmpeqb %xmm1, %xmm0 300 pmovmskb %xmm0, %rdx 301 302# ifdef USE_AS_STRNCPY 303# if defined USE_AS_STPCPY || defined USE_AS_STRCAT 304 cmp $16, %r8 305# else 306 cmp $17, %r8 307# endif 308 jbe L(CopyFrom1To16BytesTail1Case2OrCase3) 309# endif 310 test %rdx, %rdx 311 jnz L(CopyFrom1To16BytesTail1) 312 313 pcmpeqb %xmm2, %xmm0 314 movdqu %xmm1, (%rdi) 315 pmovmskb %xmm0, %rdx 316 317# ifdef USE_AS_STRNCPY 318# if defined USE_AS_STPCPY || defined USE_AS_STRCAT 319 cmp $32, %r8 320# else 321 cmp $33, %r8 322# endif 323 jbe L(CopyFrom1To32Bytes1Case2OrCase3) 324# endif 325 test %rdx, %rdx 326 jnz L(CopyFrom1To32Bytes1) 327 328 and $-16, %rsi 329 and $15, %rcx 330 jmp L(Unalign16Both) 331 332/*------End of main part with loops---------------------*/ 333 334/* Case1 */ 335 336# if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT) 337 .p2align 4 338L(CopyFrom1To16Bytes): 339 add %rcx, %rdi 340 add %rcx, %rsi 341 bsf %rdx, %rdx 342 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 343# endif 344 .p2align 4 345L(CopyFrom1To16BytesTail): 346 add %rcx, %rsi 347 bsf %rdx, %rdx 348 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 349 350 .p2align 4 351L(CopyFrom1To32Bytes1): 352 add $16, %rsi 353 add $16, %rdi 354# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 355 sub $16, %r8 356# endif 357L(CopyFrom1To16BytesTail1): 358 bsf %rdx, %rdx 359 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 360 361 .p2align 4 362L(CopyFrom1To32Bytes): 363 bsf %rdx, %rdx 364 add %rcx, %rsi 365 add $16, %rdx 366 sub %rcx, %rdx 367 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 368 369 .p2align 4 370L(CopyFrom1To16BytesUnaligned_0): 371 bsf %rdx, %rdx 372# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 373# ifdef USE_AS_STPCPY 374 lea (%rdi, %rdx), %rax 375# endif 376 movdqu %xmm4, (%rdi) 377 add $63, %r8 378 sub %rdx, %r8 379 lea 1(%rdi, %rdx), %rdi 380 jmp L(StrncpyFillTailWithZero) 381# else 382 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 383# endif 384 385 .p2align 4 386L(CopyFrom1To16BytesUnaligned_16): 387 bsf %rcx, %rdx 388 movdqu %xmm4, (%rdi) 389# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 390# ifdef USE_AS_STPCPY 391 lea 16(%rdi, %rdx), %rax 392# endif 393 movdqu %xmm5, 16(%rdi) 394 add $47, %r8 395 sub %rdx, %r8 396 lea 17(%rdi, %rdx), %rdi 397 jmp L(StrncpyFillTailWithZero) 398# else 399 add $16, %rsi 400 add $16, %rdi 401 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 402# endif 403 404 .p2align 4 405L(CopyFrom1To16BytesUnaligned_32): 406 bsf %rdx, %rdx 407 movdqu %xmm4, (%rdi) 408 movdqu %xmm5, 16(%rdi) 409# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 410# ifdef USE_AS_STPCPY 411 lea 32(%rdi, %rdx), %rax 412# endif 413 movdqu %xmm6, 32(%rdi) 414 add $31, %r8 415 sub %rdx, %r8 416 lea 33(%rdi, %rdx), %rdi 417 jmp L(StrncpyFillTailWithZero) 418# else 419 add $32, %rsi 420 add $32, %rdi 421 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 422# endif 423 424# ifdef USE_AS_STRNCPY 425# ifndef USE_AS_STRCAT 426 .p2align 4 427L(CopyFrom1To16BytesUnalignedXmm6): 428 movdqu %xmm6, (%rdi, %rcx) 429 jmp L(CopyFrom1To16BytesXmmExit) 430 431 .p2align 4 432L(CopyFrom1To16BytesUnalignedXmm5): 433 movdqu %xmm5, (%rdi, %rcx) 434 jmp L(CopyFrom1To16BytesXmmExit) 435 436 .p2align 4 437L(CopyFrom1To16BytesUnalignedXmm4): 438 movdqu %xmm4, (%rdi, %rcx) 439 jmp L(CopyFrom1To16BytesXmmExit) 440 441 .p2align 4 442L(CopyFrom1To16BytesUnalignedXmm3): 443 movdqu %xmm3, (%rdi, %rcx) 444 jmp L(CopyFrom1To16BytesXmmExit) 445 446 .p2align 4 447L(CopyFrom1To16BytesUnalignedXmm1): 448 movdqu %xmm1, (%rdi, %rcx) 449 jmp L(CopyFrom1To16BytesXmmExit) 450# endif 451 452 .p2align 4 453L(CopyFrom1To16BytesExit): 454 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 455 456/* Case2 */ 457 458 .p2align 4 459L(CopyFrom1To16BytesCase2): 460 add $16, %r8 461 add %rcx, %rdi 462 add %rcx, %rsi 463 bsf %rdx, %rdx 464 cmp %r8, %rdx 465 jb L(CopyFrom1To16BytesExit) 466 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 467 468 .p2align 4 469L(CopyFrom1To32BytesCase2): 470 add %rcx, %rsi 471 bsf %rdx, %rdx 472 add $16, %rdx 473 sub %rcx, %rdx 474 cmp %r8, %rdx 475 jb L(CopyFrom1To16BytesExit) 476 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 477 478L(CopyFrom1To16BytesTailCase2): 479 add %rcx, %rsi 480 bsf %rdx, %rdx 481 cmp %r8, %rdx 482 jb L(CopyFrom1To16BytesExit) 483 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 484 485L(CopyFrom1To16BytesTail1Case2): 486 bsf %rdx, %rdx 487 cmp %r8, %rdx 488 jb L(CopyFrom1To16BytesExit) 489 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 490 491/* Case2 or Case3, Case3 */ 492 493 .p2align 4 494L(CopyFrom1To16BytesCase2OrCase3): 495 test %rdx, %rdx 496 jnz L(CopyFrom1To16BytesCase2) 497L(CopyFrom1To16BytesCase3): 498 add $16, %r8 499 add %rcx, %rdi 500 add %rcx, %rsi 501 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 502 503 .p2align 4 504L(CopyFrom1To32BytesCase2OrCase3): 505 test %rdx, %rdx 506 jnz L(CopyFrom1To32BytesCase2) 507 add %rcx, %rsi 508 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 509 510 .p2align 4 511L(CopyFrom1To16BytesTailCase2OrCase3): 512 test %rdx, %rdx 513 jnz L(CopyFrom1To16BytesTailCase2) 514 add %rcx, %rsi 515 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 516 517 .p2align 4 518L(CopyFrom1To32Bytes1Case2OrCase3): 519 add $16, %rdi 520 add $16, %rsi 521 sub $16, %r8 522L(CopyFrom1To16BytesTail1Case2OrCase3): 523 test %rdx, %rdx 524 jnz L(CopyFrom1To16BytesTail1Case2) 525 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 526 527# endif 528 529/*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/ 530 531 .p2align 4 532L(Exit1): 533 mov %dh, (%rdi) 534# ifdef USE_AS_STPCPY 535 lea (%rdi), %rax 536# endif 537# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 538 sub $1, %r8 539 lea 1(%rdi), %rdi 540 jnz L(StrncpyFillTailWithZero) 541# endif 542 ret 543 544 .p2align 4 545L(Exit2): 546 mov (%rsi), %dx 547 mov %dx, (%rdi) 548# ifdef USE_AS_STPCPY 549 lea 1(%rdi), %rax 550# endif 551# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 552 sub $2, %r8 553 lea 2(%rdi), %rdi 554 jnz L(StrncpyFillTailWithZero) 555# endif 556 ret 557 558 .p2align 4 559L(Exit3): 560 mov (%rsi), %cx 561 mov %cx, (%rdi) 562 mov %dh, 2(%rdi) 563# ifdef USE_AS_STPCPY 564 lea 2(%rdi), %rax 565# endif 566# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 567 sub $3, %r8 568 lea 3(%rdi), %rdi 569 jnz L(StrncpyFillTailWithZero) 570# endif 571 ret 572 573 .p2align 4 574L(Exit4): 575 mov (%rsi), %edx 576 mov %edx, (%rdi) 577# ifdef USE_AS_STPCPY 578 lea 3(%rdi), %rax 579# endif 580# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 581 sub $4, %r8 582 lea 4(%rdi), %rdi 583 jnz L(StrncpyFillTailWithZero) 584# endif 585 ret 586 587 .p2align 4 588L(Exit5): 589 mov (%rsi), %ecx 590 mov %dh, 4(%rdi) 591 mov %ecx, (%rdi) 592# ifdef USE_AS_STPCPY 593 lea 4(%rdi), %rax 594# endif 595# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 596 sub $5, %r8 597 lea 5(%rdi), %rdi 598 jnz L(StrncpyFillTailWithZero) 599# endif 600 ret 601 602 .p2align 4 603L(Exit6): 604 mov (%rsi), %ecx 605 mov 4(%rsi), %dx 606 mov %ecx, (%rdi) 607 mov %dx, 4(%rdi) 608# ifdef USE_AS_STPCPY 609 lea 5(%rdi), %rax 610# endif 611# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 612 sub $6, %r8 613 lea 6(%rdi), %rdi 614 jnz L(StrncpyFillTailWithZero) 615# endif 616 ret 617 618 .p2align 4 619L(Exit7): 620 mov (%rsi), %ecx 621 mov 3(%rsi), %edx 622 mov %ecx, (%rdi) 623 mov %edx, 3(%rdi) 624# ifdef USE_AS_STPCPY 625 lea 6(%rdi), %rax 626# endif 627# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 628 sub $7, %r8 629 lea 7(%rdi), %rdi 630 jnz L(StrncpyFillTailWithZero) 631# endif 632 ret 633 634 .p2align 4 635L(Exit8): 636 mov (%rsi), %rdx 637 mov %rdx, (%rdi) 638# ifdef USE_AS_STPCPY 639 lea 7(%rdi), %rax 640# endif 641# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 642 sub $8, %r8 643 lea 8(%rdi), %rdi 644 jnz L(StrncpyFillTailWithZero) 645# endif 646 ret 647 648 .p2align 4 649L(Exit9): 650 mov (%rsi), %rcx 651 mov %dh, 8(%rdi) 652 mov %rcx, (%rdi) 653# ifdef USE_AS_STPCPY 654 lea 8(%rdi), %rax 655# endif 656# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 657 sub $9, %r8 658 lea 9(%rdi), %rdi 659 jnz L(StrncpyFillTailWithZero) 660# endif 661 ret 662 663 .p2align 4 664L(Exit10): 665 mov (%rsi), %rcx 666 mov 8(%rsi), %dx 667 mov %rcx, (%rdi) 668 mov %dx, 8(%rdi) 669# ifdef USE_AS_STPCPY 670 lea 9(%rdi), %rax 671# endif 672# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 673 sub $10, %r8 674 lea 10(%rdi), %rdi 675 jnz L(StrncpyFillTailWithZero) 676# endif 677 ret 678 679 .p2align 4 680L(Exit11): 681 mov (%rsi), %rcx 682 mov 7(%rsi), %edx 683 mov %rcx, (%rdi) 684 mov %edx, 7(%rdi) 685# ifdef USE_AS_STPCPY 686 lea 10(%rdi), %rax 687# endif 688# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 689 sub $11, %r8 690 lea 11(%rdi), %rdi 691 jnz L(StrncpyFillTailWithZero) 692# endif 693 ret 694 695 .p2align 4 696L(Exit12): 697 mov (%rsi), %rcx 698 mov 8(%rsi), %edx 699 mov %rcx, (%rdi) 700 mov %edx, 8(%rdi) 701# ifdef USE_AS_STPCPY 702 lea 11(%rdi), %rax 703# endif 704# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 705 sub $12, %r8 706 lea 12(%rdi), %rdi 707 jnz L(StrncpyFillTailWithZero) 708# endif 709 ret 710 711 .p2align 4 712L(Exit13): 713 mov (%rsi), %rcx 714 mov 5(%rsi), %rdx 715 mov %rcx, (%rdi) 716 mov %rdx, 5(%rdi) 717# ifdef USE_AS_STPCPY 718 lea 12(%rdi), %rax 719# endif 720# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 721 sub $13, %r8 722 lea 13(%rdi), %rdi 723 jnz L(StrncpyFillTailWithZero) 724# endif 725 ret 726 727 .p2align 4 728L(Exit14): 729 mov (%rsi), %rcx 730 mov 6(%rsi), %rdx 731 mov %rcx, (%rdi) 732 mov %rdx, 6(%rdi) 733# ifdef USE_AS_STPCPY 734 lea 13(%rdi), %rax 735# endif 736# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 737 sub $14, %r8 738 lea 14(%rdi), %rdi 739 jnz L(StrncpyFillTailWithZero) 740# endif 741 ret 742 743 .p2align 4 744L(Exit15): 745 mov (%rsi), %rcx 746 mov 7(%rsi), %rdx 747 mov %rcx, (%rdi) 748 mov %rdx, 7(%rdi) 749# ifdef USE_AS_STPCPY 750 lea 14(%rdi), %rax 751# endif 752# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 753 sub $15, %r8 754 lea 15(%rdi), %rdi 755 jnz L(StrncpyFillTailWithZero) 756# endif 757 ret 758 759 .p2align 4 760L(Exit16): 761 movdqu (%rsi), %xmm0 762 movdqu %xmm0, (%rdi) 763# ifdef USE_AS_STPCPY 764 lea 15(%rdi), %rax 765# endif 766# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 767 sub $16, %r8 768 lea 16(%rdi), %rdi 769 jnz L(StrncpyFillTailWithZero) 770# endif 771 ret 772 773 .p2align 4 774L(Exit17): 775 movdqu (%rsi), %xmm0 776 movdqu %xmm0, (%rdi) 777 mov %dh, 16(%rdi) 778# ifdef USE_AS_STPCPY 779 lea 16(%rdi), %rax 780# endif 781# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 782 sub $17, %r8 783 lea 17(%rdi), %rdi 784 jnz L(StrncpyFillTailWithZero) 785# endif 786 ret 787 788 .p2align 4 789L(Exit18): 790 movdqu (%rsi), %xmm0 791 mov 16(%rsi), %cx 792 movdqu %xmm0, (%rdi) 793 mov %cx, 16(%rdi) 794# ifdef USE_AS_STPCPY 795 lea 17(%rdi), %rax 796# endif 797# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 798 sub $18, %r8 799 lea 18(%rdi), %rdi 800 jnz L(StrncpyFillTailWithZero) 801# endif 802 ret 803 804 .p2align 4 805L(Exit19): 806 movdqu (%rsi), %xmm0 807 mov 15(%rsi), %ecx 808 movdqu %xmm0, (%rdi) 809 mov %ecx, 15(%rdi) 810# ifdef USE_AS_STPCPY 811 lea 18(%rdi), %rax 812# endif 813# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 814 sub $19, %r8 815 lea 19(%rdi), %rdi 816 jnz L(StrncpyFillTailWithZero) 817# endif 818 ret 819 820 .p2align 4 821L(Exit20): 822 movdqu (%rsi), %xmm0 823 mov 16(%rsi), %ecx 824 movdqu %xmm0, (%rdi) 825 mov %ecx, 16(%rdi) 826# ifdef USE_AS_STPCPY 827 lea 19(%rdi), %rax 828# endif 829# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 830 sub $20, %r8 831 lea 20(%rdi), %rdi 832 jnz L(StrncpyFillTailWithZero) 833# endif 834 ret 835 836 .p2align 4 837L(Exit21): 838 movdqu (%rsi), %xmm0 839 mov 16(%rsi), %ecx 840 movdqu %xmm0, (%rdi) 841 mov %ecx, 16(%rdi) 842 mov %dh, 20(%rdi) 843# ifdef USE_AS_STPCPY 844 lea 20(%rdi), %rax 845# endif 846# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 847 sub $21, %r8 848 lea 21(%rdi), %rdi 849 jnz L(StrncpyFillTailWithZero) 850# endif 851 ret 852 853 .p2align 4 854L(Exit22): 855 movdqu (%rsi), %xmm0 856 mov 14(%rsi), %rcx 857 movdqu %xmm0, (%rdi) 858 mov %rcx, 14(%rdi) 859# ifdef USE_AS_STPCPY 860 lea 21(%rdi), %rax 861# endif 862# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 863 sub $22, %r8 864 lea 22(%rdi), %rdi 865 jnz L(StrncpyFillTailWithZero) 866# endif 867 ret 868 869 .p2align 4 870L(Exit23): 871 movdqu (%rsi), %xmm0 872 mov 15(%rsi), %rcx 873 movdqu %xmm0, (%rdi) 874 mov %rcx, 15(%rdi) 875# ifdef USE_AS_STPCPY 876 lea 22(%rdi), %rax 877# endif 878# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 879 sub $23, %r8 880 lea 23(%rdi), %rdi 881 jnz L(StrncpyFillTailWithZero) 882# endif 883 ret 884 885 .p2align 4 886L(Exit24): 887 movdqu (%rsi), %xmm0 888 mov 16(%rsi), %rcx 889 movdqu %xmm0, (%rdi) 890 mov %rcx, 16(%rdi) 891# ifdef USE_AS_STPCPY 892 lea 23(%rdi), %rax 893# endif 894# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 895 sub $24, %r8 896 lea 24(%rdi), %rdi 897 jnz L(StrncpyFillTailWithZero) 898# endif 899 ret 900 901 .p2align 4 902L(Exit25): 903 movdqu (%rsi), %xmm0 904 mov 16(%rsi), %rcx 905 movdqu %xmm0, (%rdi) 906 mov %rcx, 16(%rdi) 907 mov %dh, 24(%rdi) 908# ifdef USE_AS_STPCPY 909 lea 24(%rdi), %rax 910# endif 911# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 912 sub $25, %r8 913 lea 25(%rdi), %rdi 914 jnz L(StrncpyFillTailWithZero) 915# endif 916 ret 917 918 .p2align 4 919L(Exit26): 920 movdqu (%rsi), %xmm0 921 mov 16(%rsi), %rdx 922 mov 24(%rsi), %cx 923 movdqu %xmm0, (%rdi) 924 mov %rdx, 16(%rdi) 925 mov %cx, 24(%rdi) 926# ifdef USE_AS_STPCPY 927 lea 25(%rdi), %rax 928# endif 929# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 930 sub $26, %r8 931 lea 26(%rdi), %rdi 932 jnz L(StrncpyFillTailWithZero) 933# endif 934 ret 935 936 .p2align 4 937L(Exit27): 938 movdqu (%rsi), %xmm0 939 mov 16(%rsi), %rdx 940 mov 23(%rsi), %ecx 941 movdqu %xmm0, (%rdi) 942 mov %rdx, 16(%rdi) 943 mov %ecx, 23(%rdi) 944# ifdef USE_AS_STPCPY 945 lea 26(%rdi), %rax 946# endif 947# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 948 sub $27, %r8 949 lea 27(%rdi), %rdi 950 jnz L(StrncpyFillTailWithZero) 951# endif 952 ret 953 954 .p2align 4 955L(Exit28): 956 movdqu (%rsi), %xmm0 957 mov 16(%rsi), %rdx 958 mov 24(%rsi), %ecx 959 movdqu %xmm0, (%rdi) 960 mov %rdx, 16(%rdi) 961 mov %ecx, 24(%rdi) 962# ifdef USE_AS_STPCPY 963 lea 27(%rdi), %rax 964# endif 965# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 966 sub $28, %r8 967 lea 28(%rdi), %rdi 968 jnz L(StrncpyFillTailWithZero) 969# endif 970 ret 971 972 .p2align 4 973L(Exit29): 974 movdqu (%rsi), %xmm0 975 movdqu 13(%rsi), %xmm2 976 movdqu %xmm0, (%rdi) 977 movdqu %xmm2, 13(%rdi) 978# ifdef USE_AS_STPCPY 979 lea 28(%rdi), %rax 980# endif 981# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 982 sub $29, %r8 983 lea 29(%rdi), %rdi 984 jnz L(StrncpyFillTailWithZero) 985# endif 986 ret 987 988 .p2align 4 989L(Exit30): 990 movdqu (%rsi), %xmm0 991 movdqu 14(%rsi), %xmm2 992 movdqu %xmm0, (%rdi) 993 movdqu %xmm2, 14(%rdi) 994# ifdef USE_AS_STPCPY 995 lea 29(%rdi), %rax 996# endif 997# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 998 sub $30, %r8 999 lea 30(%rdi), %rdi 1000 jnz L(StrncpyFillTailWithZero) 1001# endif 1002 ret 1003 1004 .p2align 4 1005L(Exit31): 1006 movdqu (%rsi), %xmm0 1007 movdqu 15(%rsi), %xmm2 1008 movdqu %xmm0, (%rdi) 1009 movdqu %xmm2, 15(%rdi) 1010# ifdef USE_AS_STPCPY 1011 lea 30(%rdi), %rax 1012# endif 1013# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 1014 sub $31, %r8 1015 lea 31(%rdi), %rdi 1016 jnz L(StrncpyFillTailWithZero) 1017# endif 1018 ret 1019 1020 .p2align 4 1021L(Exit32): 1022 movdqu (%rsi), %xmm0 1023 movdqu 16(%rsi), %xmm2 1024 movdqu %xmm0, (%rdi) 1025 movdqu %xmm2, 16(%rdi) 1026# ifdef USE_AS_STPCPY 1027 lea 31(%rdi), %rax 1028# endif 1029# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 1030 sub $32, %r8 1031 lea 32(%rdi), %rdi 1032 jnz L(StrncpyFillTailWithZero) 1033# endif 1034 ret 1035 1036# ifdef USE_AS_STRNCPY 1037 1038 .p2align 4 1039L(StrncpyExit0): 1040# ifdef USE_AS_STPCPY 1041 mov %rdi, %rax 1042# endif 1043# ifdef USE_AS_STRCAT 1044 xor %ch, %ch 1045 movb %ch, (%rdi) 1046# endif 1047 ret 1048 1049 .p2align 4 1050L(StrncpyExit1): 1051 mov (%rsi), %dl 1052 mov %dl, (%rdi) 1053# ifdef USE_AS_STPCPY 1054 lea 1(%rdi), %rax 1055# endif 1056# ifdef USE_AS_STRCAT 1057 xor %ch, %ch 1058 movb %ch, 1(%rdi) 1059# endif 1060 ret 1061 1062 .p2align 4 1063L(StrncpyExit2): 1064 mov (%rsi), %dx 1065 mov %dx, (%rdi) 1066# ifdef USE_AS_STPCPY 1067 lea 2(%rdi), %rax 1068# endif 1069# ifdef USE_AS_STRCAT 1070 xor %ch, %ch 1071 movb %ch, 2(%rdi) 1072# endif 1073 ret 1074 1075 .p2align 4 1076L(StrncpyExit3): 1077 mov (%rsi), %cx 1078 mov 2(%rsi), %dl 1079 mov %cx, (%rdi) 1080 mov %dl, 2(%rdi) 1081# ifdef USE_AS_STPCPY 1082 lea 3(%rdi), %rax 1083# endif 1084# ifdef USE_AS_STRCAT 1085 xor %ch, %ch 1086 movb %ch, 3(%rdi) 1087# endif 1088 ret 1089 1090 .p2align 4 1091L(StrncpyExit4): 1092 mov (%rsi), %edx 1093 mov %edx, (%rdi) 1094# ifdef USE_AS_STPCPY 1095 lea 4(%rdi), %rax 1096# endif 1097# ifdef USE_AS_STRCAT 1098 xor %ch, %ch 1099 movb %ch, 4(%rdi) 1100# endif 1101 ret 1102 1103 .p2align 4 1104L(StrncpyExit5): 1105 mov (%rsi), %ecx 1106 mov 4(%rsi), %dl 1107 mov %ecx, (%rdi) 1108 mov %dl, 4(%rdi) 1109# ifdef USE_AS_STPCPY 1110 lea 5(%rdi), %rax 1111# endif 1112# ifdef USE_AS_STRCAT 1113 xor %ch, %ch 1114 movb %ch, 5(%rdi) 1115# endif 1116 ret 1117 1118 .p2align 4 1119L(StrncpyExit6): 1120 mov (%rsi), %ecx 1121 mov 4(%rsi), %dx 1122 mov %ecx, (%rdi) 1123 mov %dx, 4(%rdi) 1124# ifdef USE_AS_STPCPY 1125 lea 6(%rdi), %rax 1126# endif 1127# ifdef USE_AS_STRCAT 1128 xor %ch, %ch 1129 movb %ch, 6(%rdi) 1130# endif 1131 ret 1132 1133 .p2align 4 1134L(StrncpyExit7): 1135 mov (%rsi), %ecx 1136 mov 3(%rsi), %edx 1137 mov %ecx, (%rdi) 1138 mov %edx, 3(%rdi) 1139# ifdef USE_AS_STPCPY 1140 lea 7(%rdi), %rax 1141# endif 1142# ifdef USE_AS_STRCAT 1143 xor %ch, %ch 1144 movb %ch, 7(%rdi) 1145# endif 1146 ret 1147 1148 .p2align 4 1149L(StrncpyExit8): 1150 mov (%rsi), %rdx 1151 mov %rdx, (%rdi) 1152# ifdef USE_AS_STPCPY 1153 lea 8(%rdi), %rax 1154# endif 1155# ifdef USE_AS_STRCAT 1156 xor %ch, %ch 1157 movb %ch, 8(%rdi) 1158# endif 1159 ret 1160 1161 .p2align 4 1162L(StrncpyExit9): 1163 mov (%rsi), %rcx 1164 mov 8(%rsi), %dl 1165 mov %rcx, (%rdi) 1166 mov %dl, 8(%rdi) 1167# ifdef USE_AS_STPCPY 1168 lea 9(%rdi), %rax 1169# endif 1170# ifdef USE_AS_STRCAT 1171 xor %ch, %ch 1172 movb %ch, 9(%rdi) 1173# endif 1174 ret 1175 1176 .p2align 4 1177L(StrncpyExit10): 1178 mov (%rsi), %rcx 1179 mov 8(%rsi), %dx 1180 mov %rcx, (%rdi) 1181 mov %dx, 8(%rdi) 1182# ifdef USE_AS_STPCPY 1183 lea 10(%rdi), %rax 1184# endif 1185# ifdef USE_AS_STRCAT 1186 xor %ch, %ch 1187 movb %ch, 10(%rdi) 1188# endif 1189 ret 1190 1191 .p2align 4 1192L(StrncpyExit11): 1193 mov (%rsi), %rcx 1194 mov 7(%rsi), %edx 1195 mov %rcx, (%rdi) 1196 mov %edx, 7(%rdi) 1197# ifdef USE_AS_STPCPY 1198 lea 11(%rdi), %rax 1199# endif 1200# ifdef USE_AS_STRCAT 1201 xor %ch, %ch 1202 movb %ch, 11(%rdi) 1203# endif 1204 ret 1205 1206 .p2align 4 1207L(StrncpyExit12): 1208 mov (%rsi), %rcx 1209 mov 8(%rsi), %edx 1210 mov %rcx, (%rdi) 1211 mov %edx, 8(%rdi) 1212# ifdef USE_AS_STPCPY 1213 lea 12(%rdi), %rax 1214# endif 1215# ifdef USE_AS_STRCAT 1216 xor %ch, %ch 1217 movb %ch, 12(%rdi) 1218# endif 1219 ret 1220 1221 .p2align 4 1222L(StrncpyExit13): 1223 mov (%rsi), %rcx 1224 mov 5(%rsi), %rdx 1225 mov %rcx, (%rdi) 1226 mov %rdx, 5(%rdi) 1227# ifdef USE_AS_STPCPY 1228 lea 13(%rdi), %rax 1229# endif 1230# ifdef USE_AS_STRCAT 1231 xor %ch, %ch 1232 movb %ch, 13(%rdi) 1233# endif 1234 ret 1235 1236 .p2align 4 1237L(StrncpyExit14): 1238 mov (%rsi), %rcx 1239 mov 6(%rsi), %rdx 1240 mov %rcx, (%rdi) 1241 mov %rdx, 6(%rdi) 1242# ifdef USE_AS_STPCPY 1243 lea 14(%rdi), %rax 1244# endif 1245# ifdef USE_AS_STRCAT 1246 xor %ch, %ch 1247 movb %ch, 14(%rdi) 1248# endif 1249 ret 1250 1251 .p2align 4 1252L(StrncpyExit15): 1253 mov (%rsi), %rcx 1254 mov 7(%rsi), %rdx 1255 mov %rcx, (%rdi) 1256 mov %rdx, 7(%rdi) 1257# ifdef USE_AS_STPCPY 1258 lea 15(%rdi), %rax 1259# endif 1260# ifdef USE_AS_STRCAT 1261 xor %ch, %ch 1262 movb %ch, 15(%rdi) 1263# endif 1264 ret 1265 1266 .p2align 4 1267L(StrncpyExit16): 1268 movdqu (%rsi), %xmm0 1269 movdqu %xmm0, (%rdi) 1270# ifdef USE_AS_STPCPY 1271 lea 16(%rdi), %rax 1272# endif 1273# ifdef USE_AS_STRCAT 1274 xor %ch, %ch 1275 movb %ch, 16(%rdi) 1276# endif 1277 ret 1278 1279 .p2align 4 1280L(StrncpyExit17): 1281 movdqu (%rsi), %xmm0 1282 mov 16(%rsi), %cl 1283 movdqu %xmm0, (%rdi) 1284 mov %cl, 16(%rdi) 1285# ifdef USE_AS_STPCPY 1286 lea 17(%rdi), %rax 1287# endif 1288# ifdef USE_AS_STRCAT 1289 xor %ch, %ch 1290 movb %ch, 17(%rdi) 1291# endif 1292 ret 1293 1294 .p2align 4 1295L(StrncpyExit18): 1296 movdqu (%rsi), %xmm0 1297 mov 16(%rsi), %cx 1298 movdqu %xmm0, (%rdi) 1299 mov %cx, 16(%rdi) 1300# ifdef USE_AS_STPCPY 1301 lea 18(%rdi), %rax 1302# endif 1303# ifdef USE_AS_STRCAT 1304 xor %ch, %ch 1305 movb %ch, 18(%rdi) 1306# endif 1307 ret 1308 1309 .p2align 4 1310L(StrncpyExit19): 1311 movdqu (%rsi), %xmm0 1312 mov 15(%rsi), %ecx 1313 movdqu %xmm0, (%rdi) 1314 mov %ecx, 15(%rdi) 1315# ifdef USE_AS_STPCPY 1316 lea 19(%rdi), %rax 1317# endif 1318# ifdef USE_AS_STRCAT 1319 xor %ch, %ch 1320 movb %ch, 19(%rdi) 1321# endif 1322 ret 1323 1324 .p2align 4 1325L(StrncpyExit20): 1326 movdqu (%rsi), %xmm0 1327 mov 16(%rsi), %ecx 1328 movdqu %xmm0, (%rdi) 1329 mov %ecx, 16(%rdi) 1330# ifdef USE_AS_STPCPY 1331 lea 20(%rdi), %rax 1332# endif 1333# ifdef USE_AS_STRCAT 1334 xor %ch, %ch 1335 movb %ch, 20(%rdi) 1336# endif 1337 ret 1338 1339 .p2align 4 1340L(StrncpyExit21): 1341 movdqu (%rsi), %xmm0 1342 mov 16(%rsi), %ecx 1343 mov 20(%rsi), %dl 1344 movdqu %xmm0, (%rdi) 1345 mov %ecx, 16(%rdi) 1346 mov %dl, 20(%rdi) 1347# ifdef USE_AS_STPCPY 1348 lea 21(%rdi), %rax 1349# endif 1350# ifdef USE_AS_STRCAT 1351 xor %ch, %ch 1352 movb %ch, 21(%rdi) 1353# endif 1354 ret 1355 1356 .p2align 4 1357L(StrncpyExit22): 1358 movdqu (%rsi), %xmm0 1359 mov 14(%rsi), %rcx 1360 movdqu %xmm0, (%rdi) 1361 mov %rcx, 14(%rdi) 1362# ifdef USE_AS_STPCPY 1363 lea 22(%rdi), %rax 1364# endif 1365# ifdef USE_AS_STRCAT 1366 xor %ch, %ch 1367 movb %ch, 22(%rdi) 1368# endif 1369 ret 1370 1371 .p2align 4 1372L(StrncpyExit23): 1373 movdqu (%rsi), %xmm0 1374 mov 15(%rsi), %rcx 1375 movdqu %xmm0, (%rdi) 1376 mov %rcx, 15(%rdi) 1377# ifdef USE_AS_STPCPY 1378 lea 23(%rdi), %rax 1379# endif 1380# ifdef USE_AS_STRCAT 1381 xor %ch, %ch 1382 movb %ch, 23(%rdi) 1383# endif 1384 ret 1385 1386 .p2align 4 1387L(StrncpyExit24): 1388 movdqu (%rsi), %xmm0 1389 mov 16(%rsi), %rcx 1390 movdqu %xmm0, (%rdi) 1391 mov %rcx, 16(%rdi) 1392# ifdef USE_AS_STPCPY 1393 lea 24(%rdi), %rax 1394# endif 1395# ifdef USE_AS_STRCAT 1396 xor %ch, %ch 1397 movb %ch, 24(%rdi) 1398# endif 1399 ret 1400 1401 .p2align 4 1402L(StrncpyExit25): 1403 movdqu (%rsi), %xmm0 1404 mov 16(%rsi), %rdx 1405 mov 24(%rsi), %cl 1406 movdqu %xmm0, (%rdi) 1407 mov %rdx, 16(%rdi) 1408 mov %cl, 24(%rdi) 1409# ifdef USE_AS_STPCPY 1410 lea 25(%rdi), %rax 1411# endif 1412# ifdef USE_AS_STRCAT 1413 xor %ch, %ch 1414 movb %ch, 25(%rdi) 1415# endif 1416 ret 1417 1418 .p2align 4 1419L(StrncpyExit26): 1420 movdqu (%rsi), %xmm0 1421 mov 16(%rsi), %rdx 1422 mov 24(%rsi), %cx 1423 movdqu %xmm0, (%rdi) 1424 mov %rdx, 16(%rdi) 1425 mov %cx, 24(%rdi) 1426# ifdef USE_AS_STPCPY 1427 lea 26(%rdi), %rax 1428# endif 1429# ifdef USE_AS_STRCAT 1430 xor %ch, %ch 1431 movb %ch, 26(%rdi) 1432# endif 1433 ret 1434 1435 .p2align 4 1436L(StrncpyExit27): 1437 movdqu (%rsi), %xmm0 1438 mov 16(%rsi), %rdx 1439 mov 23(%rsi), %ecx 1440 movdqu %xmm0, (%rdi) 1441 mov %rdx, 16(%rdi) 1442 mov %ecx, 23(%rdi) 1443# ifdef USE_AS_STPCPY 1444 lea 27(%rdi), %rax 1445# endif 1446# ifdef USE_AS_STRCAT 1447 xor %ch, %ch 1448 movb %ch, 27(%rdi) 1449# endif 1450 ret 1451 1452 .p2align 4 1453L(StrncpyExit28): 1454 movdqu (%rsi), %xmm0 1455 mov 16(%rsi), %rdx 1456 mov 24(%rsi), %ecx 1457 movdqu %xmm0, (%rdi) 1458 mov %rdx, 16(%rdi) 1459 mov %ecx, 24(%rdi) 1460# ifdef USE_AS_STPCPY 1461 lea 28(%rdi), %rax 1462# endif 1463# ifdef USE_AS_STRCAT 1464 xor %ch, %ch 1465 movb %ch, 28(%rdi) 1466# endif 1467 ret 1468 1469 .p2align 4 1470L(StrncpyExit29): 1471 movdqu (%rsi), %xmm0 1472 movdqu 13(%rsi), %xmm2 1473 movdqu %xmm0, (%rdi) 1474 movdqu %xmm2, 13(%rdi) 1475# ifdef USE_AS_STPCPY 1476 lea 29(%rdi), %rax 1477# endif 1478# ifdef USE_AS_STRCAT 1479 xor %ch, %ch 1480 movb %ch, 29(%rdi) 1481# endif 1482 ret 1483 1484 .p2align 4 1485L(StrncpyExit30): 1486 movdqu (%rsi), %xmm0 1487 movdqu 14(%rsi), %xmm2 1488 movdqu %xmm0, (%rdi) 1489 movdqu %xmm2, 14(%rdi) 1490# ifdef USE_AS_STPCPY 1491 lea 30(%rdi), %rax 1492# endif 1493# ifdef USE_AS_STRCAT 1494 xor %ch, %ch 1495 movb %ch, 30(%rdi) 1496# endif 1497 ret 1498 1499 .p2align 4 1500L(StrncpyExit31): 1501 movdqu (%rsi), %xmm0 1502 movdqu 15(%rsi), %xmm2 1503 movdqu %xmm0, (%rdi) 1504 movdqu %xmm2, 15(%rdi) 1505# ifdef USE_AS_STPCPY 1506 lea 31(%rdi), %rax 1507# endif 1508# ifdef USE_AS_STRCAT 1509 xor %ch, %ch 1510 movb %ch, 31(%rdi) 1511# endif 1512 ret 1513 1514 .p2align 4 1515L(StrncpyExit32): 1516 movdqu (%rsi), %xmm0 1517 movdqu 16(%rsi), %xmm2 1518 movdqu %xmm0, (%rdi) 1519 movdqu %xmm2, 16(%rdi) 1520# ifdef USE_AS_STPCPY 1521 lea 32(%rdi), %rax 1522# endif 1523# ifdef USE_AS_STRCAT 1524 xor %ch, %ch 1525 movb %ch, 32(%rdi) 1526# endif 1527 ret 1528 1529 .p2align 4 1530L(StrncpyExit33): 1531 movdqu (%rsi), %xmm0 1532 movdqu 16(%rsi), %xmm2 1533 mov 32(%rsi), %cl 1534 movdqu %xmm0, (%rdi) 1535 movdqu %xmm2, 16(%rdi) 1536 mov %cl, 32(%rdi) 1537# ifdef USE_AS_STRCAT 1538 xor %ch, %ch 1539 movb %ch, 33(%rdi) 1540# endif 1541 ret 1542 1543# ifndef USE_AS_STRCAT 1544 1545 .p2align 4 1546L(Fill0): 1547 ret 1548 1549 .p2align 4 1550L(Fill1): 1551 mov %dl, (%rdi) 1552 ret 1553 1554 .p2align 4 1555L(Fill2): 1556 mov %dx, (%rdi) 1557 ret 1558 1559 .p2align 4 1560L(Fill3): 1561 mov %edx, -1(%rdi) 1562 ret 1563 1564 .p2align 4 1565L(Fill4): 1566 mov %edx, (%rdi) 1567 ret 1568 1569 .p2align 4 1570L(Fill5): 1571 mov %edx, (%rdi) 1572 mov %dl, 4(%rdi) 1573 ret 1574 1575 .p2align 4 1576L(Fill6): 1577 mov %edx, (%rdi) 1578 mov %dx, 4(%rdi) 1579 ret 1580 1581 .p2align 4 1582L(Fill7): 1583 mov %rdx, -1(%rdi) 1584 ret 1585 1586 .p2align 4 1587L(Fill8): 1588 mov %rdx, (%rdi) 1589 ret 1590 1591 .p2align 4 1592L(Fill9): 1593 mov %rdx, (%rdi) 1594 mov %dl, 8(%rdi) 1595 ret 1596 1597 .p2align 4 1598L(Fill10): 1599 mov %rdx, (%rdi) 1600 mov %dx, 8(%rdi) 1601 ret 1602 1603 .p2align 4 1604L(Fill11): 1605 mov %rdx, (%rdi) 1606 mov %edx, 7(%rdi) 1607 ret 1608 1609 .p2align 4 1610L(Fill12): 1611 mov %rdx, (%rdi) 1612 mov %edx, 8(%rdi) 1613 ret 1614 1615 .p2align 4 1616L(Fill13): 1617 mov %rdx, (%rdi) 1618 mov %rdx, 5(%rdi) 1619 ret 1620 1621 .p2align 4 1622L(Fill14): 1623 mov %rdx, (%rdi) 1624 mov %rdx, 6(%rdi) 1625 ret 1626 1627 .p2align 4 1628L(Fill15): 1629 movdqu %xmm0, -1(%rdi) 1630 ret 1631 1632 .p2align 4 1633L(Fill16): 1634 movdqu %xmm0, (%rdi) 1635 ret 1636 1637 .p2align 4 1638L(CopyFrom1To16BytesUnalignedXmm2): 1639 movdqu %xmm2, (%rdi, %rcx) 1640 1641 .p2align 4 1642L(CopyFrom1To16BytesXmmExit): 1643 bsf %rdx, %rdx 1644 add $15, %r8 1645 add %rcx, %rdi 1646# ifdef USE_AS_STPCPY 1647 lea (%rdi, %rdx), %rax 1648# endif 1649 sub %rdx, %r8 1650 lea 1(%rdi, %rdx), %rdi 1651 1652 .p2align 4 1653L(StrncpyFillTailWithZero): 1654 pxor %xmm0, %xmm0 1655 xor %rdx, %rdx 1656 sub $16, %r8 1657 jbe L(StrncpyFillExit) 1658 1659 movdqu %xmm0, (%rdi) 1660 add $16, %rdi 1661 1662 mov %rdi, %rsi 1663 and $0xf, %rsi 1664 sub %rsi, %rdi 1665 add %rsi, %r8 1666 sub $64, %r8 1667 jb L(StrncpyFillLess64) 1668 1669L(StrncpyFillLoopMovdqa): 1670 movdqa %xmm0, (%rdi) 1671 movdqa %xmm0, 16(%rdi) 1672 movdqa %xmm0, 32(%rdi) 1673 movdqa %xmm0, 48(%rdi) 1674 add $64, %rdi 1675 sub $64, %r8 1676 jae L(StrncpyFillLoopMovdqa) 1677 1678L(StrncpyFillLess64): 1679 add $32, %r8 1680 jl L(StrncpyFillLess32) 1681 movdqa %xmm0, (%rdi) 1682 movdqa %xmm0, 16(%rdi) 1683 add $32, %rdi 1684 sub $16, %r8 1685 jl L(StrncpyFillExit) 1686 movdqa %xmm0, (%rdi) 1687 add $16, %rdi 1688 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4) 1689 1690L(StrncpyFillLess32): 1691 add $16, %r8 1692 jl L(StrncpyFillExit) 1693 movdqa %xmm0, (%rdi) 1694 add $16, %rdi 1695 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4) 1696 1697L(StrncpyFillExit): 1698 add $16, %r8 1699 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4) 1700 1701/* end of ifndef USE_AS_STRCAT */ 1702# endif 1703 1704 .p2align 4 1705L(UnalignedLeaveCase2OrCase3): 1706 test %rdx, %rdx 1707 jnz L(Unaligned64LeaveCase2) 1708L(Unaligned64LeaveCase3): 1709 lea 64(%r8), %rcx 1710 and $-16, %rcx 1711 add $48, %r8 1712 jl L(CopyFrom1To16BytesCase3) 1713 movdqu %xmm4, (%rdi) 1714 sub $16, %r8 1715 jb L(CopyFrom1To16BytesCase3) 1716 movdqu %xmm5, 16(%rdi) 1717 sub $16, %r8 1718 jb L(CopyFrom1To16BytesCase3) 1719 movdqu %xmm6, 32(%rdi) 1720 sub $16, %r8 1721 jb L(CopyFrom1To16BytesCase3) 1722 movdqu %xmm7, 48(%rdi) 1723# ifdef USE_AS_STPCPY 1724 lea 64(%rdi), %rax 1725# endif 1726# ifdef USE_AS_STRCAT 1727 xor %ch, %ch 1728 movb %ch, 64(%rdi) 1729# endif 1730 ret 1731 1732 .p2align 4 1733L(Unaligned64LeaveCase2): 1734 xor %rcx, %rcx 1735 pcmpeqb %xmm4, %xmm0 1736 pmovmskb %xmm0, %rdx 1737 add $48, %r8 1738 jle L(CopyFrom1To16BytesCase2OrCase3) 1739 test %rdx, %rdx 1740# ifndef USE_AS_STRCAT 1741 jnz L(CopyFrom1To16BytesUnalignedXmm4) 1742# else 1743 jnz L(CopyFrom1To16Bytes) 1744# endif 1745 pcmpeqb %xmm5, %xmm0 1746 pmovmskb %xmm0, %rdx 1747 movdqu %xmm4, (%rdi) 1748 add $16, %rcx 1749 sub $16, %r8 1750 jbe L(CopyFrom1To16BytesCase2OrCase3) 1751 test %rdx, %rdx 1752# ifndef USE_AS_STRCAT 1753 jnz L(CopyFrom1To16BytesUnalignedXmm5) 1754# else 1755 jnz L(CopyFrom1To16Bytes) 1756# endif 1757 1758 pcmpeqb %xmm6, %xmm0 1759 pmovmskb %xmm0, %rdx 1760 movdqu %xmm5, 16(%rdi) 1761 add $16, %rcx 1762 sub $16, %r8 1763 jbe L(CopyFrom1To16BytesCase2OrCase3) 1764 test %rdx, %rdx 1765# ifndef USE_AS_STRCAT 1766 jnz L(CopyFrom1To16BytesUnalignedXmm6) 1767# else 1768 jnz L(CopyFrom1To16Bytes) 1769# endif 1770 1771 pcmpeqb %xmm7, %xmm0 1772 pmovmskb %xmm0, %rdx 1773 movdqu %xmm6, 32(%rdi) 1774 lea 16(%rdi, %rcx), %rdi 1775 lea 16(%rsi, %rcx), %rsi 1776 bsf %rdx, %rdx 1777 cmp %r8, %rdx 1778 jb L(CopyFrom1To16BytesExit) 1779 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 1780 1781 .p2align 4 1782L(ExitZero): 1783# ifndef USE_AS_STRCAT 1784 mov %rdi, %rax 1785# endif 1786 ret 1787 1788# endif 1789 1790# ifndef USE_AS_STRCAT 1791END (STRCPY) 1792# else 1793END (STRCAT) 1794# endif 1795 .p2align 4 1796 .section .rodata 1797L(ExitTable): 1798 .int JMPTBL(L(Exit1), L(ExitTable)) 1799 .int JMPTBL(L(Exit2), L(ExitTable)) 1800 .int JMPTBL(L(Exit3), L(ExitTable)) 1801 .int JMPTBL(L(Exit4), L(ExitTable)) 1802 .int JMPTBL(L(Exit5), L(ExitTable)) 1803 .int JMPTBL(L(Exit6), L(ExitTable)) 1804 .int JMPTBL(L(Exit7), L(ExitTable)) 1805 .int JMPTBL(L(Exit8), L(ExitTable)) 1806 .int JMPTBL(L(Exit9), L(ExitTable)) 1807 .int JMPTBL(L(Exit10), L(ExitTable)) 1808 .int JMPTBL(L(Exit11), L(ExitTable)) 1809 .int JMPTBL(L(Exit12), L(ExitTable)) 1810 .int JMPTBL(L(Exit13), L(ExitTable)) 1811 .int JMPTBL(L(Exit14), L(ExitTable)) 1812 .int JMPTBL(L(Exit15), L(ExitTable)) 1813 .int JMPTBL(L(Exit16), L(ExitTable)) 1814 .int JMPTBL(L(Exit17), L(ExitTable)) 1815 .int JMPTBL(L(Exit18), L(ExitTable)) 1816 .int JMPTBL(L(Exit19), L(ExitTable)) 1817 .int JMPTBL(L(Exit20), L(ExitTable)) 1818 .int JMPTBL(L(Exit21), L(ExitTable)) 1819 .int JMPTBL(L(Exit22), L(ExitTable)) 1820 .int JMPTBL(L(Exit23), L(ExitTable)) 1821 .int JMPTBL(L(Exit24), L(ExitTable)) 1822 .int JMPTBL(L(Exit25), L(ExitTable)) 1823 .int JMPTBL(L(Exit26), L(ExitTable)) 1824 .int JMPTBL(L(Exit27), L(ExitTable)) 1825 .int JMPTBL(L(Exit28), L(ExitTable)) 1826 .int JMPTBL(L(Exit29), L(ExitTable)) 1827 .int JMPTBL(L(Exit30), L(ExitTable)) 1828 .int JMPTBL(L(Exit31), L(ExitTable)) 1829 .int JMPTBL(L(Exit32), L(ExitTable)) 1830# ifdef USE_AS_STRNCPY 1831L(ExitStrncpyTable): 1832 .int JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable)) 1833 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable)) 1834 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable)) 1835 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable)) 1836 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable)) 1837 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable)) 1838 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable)) 1839 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable)) 1840 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable)) 1841 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable)) 1842 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable)) 1843 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable)) 1844 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable)) 1845 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable)) 1846 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable)) 1847 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable)) 1848 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable)) 1849 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable)) 1850 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable)) 1851 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable)) 1852 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable)) 1853 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable)) 1854 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable)) 1855 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable)) 1856 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable)) 1857 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable)) 1858 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable)) 1859 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable)) 1860 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable)) 1861 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable)) 1862 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable)) 1863 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable)) 1864 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable)) 1865 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable)) 1866# ifndef USE_AS_STRCAT 1867 .p2align 4 1868L(FillTable): 1869 .int JMPTBL(L(Fill0), L(FillTable)) 1870 .int JMPTBL(L(Fill1), L(FillTable)) 1871 .int JMPTBL(L(Fill2), L(FillTable)) 1872 .int JMPTBL(L(Fill3), L(FillTable)) 1873 .int JMPTBL(L(Fill4), L(FillTable)) 1874 .int JMPTBL(L(Fill5), L(FillTable)) 1875 .int JMPTBL(L(Fill6), L(FillTable)) 1876 .int JMPTBL(L(Fill7), L(FillTable)) 1877 .int JMPTBL(L(Fill8), L(FillTable)) 1878 .int JMPTBL(L(Fill9), L(FillTable)) 1879 .int JMPTBL(L(Fill10), L(FillTable)) 1880 .int JMPTBL(L(Fill11), L(FillTable)) 1881 .int JMPTBL(L(Fill12), L(FillTable)) 1882 .int JMPTBL(L(Fill13), L(FillTable)) 1883 .int JMPTBL(L(Fill14), L(FillTable)) 1884 .int JMPTBL(L(Fill15), L(FillTable)) 1885 .int JMPTBL(L(Fill16), L(FillTable)) 1886# endif 1887# endif 1888#endif 1889