1/* Optimized rawmemchr implementation for PowerPC64/POWER9.
2   Copyright (C) 2020-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20
21#ifndef RAWMEMCHR
22# define RAWMEMCHR __rawmemchr
23#endif
24
25/* Implements the function
26
27   int [r3] rawmemchr (void *s [r3], int c [r4])
28
29   The implementation can load bytes past a matching byte, but only
30   up to the next 16B boundary, so it never crosses a page.  */
31
32.machine power9
33ENTRY_TOCLESS (RAWMEMCHR, 4)
34	CALL_MCOUNT 2
35
36	xori	r5,r4,0xff
37
38	mtvsrd	v18+32,r4	/* matching char in v18  */
39	mtvsrd	v19+32,r5	/* non matching char in v19  */
40
41	vspltb	v18,v18,7	/* replicate  */
42	vspltb	v19,v19,7	/* replicate  */
43
44	neg	r5,r3
45	rldicl	r9,r5,0,60	/* How many bytes to get source 16B aligned?  */
46
47	/* Align data and fill bytes not loaded with non matching char  */
48	lvx	v0,0,r3
49	lvsr	v1,0,r3
50	vperm	v0,v19,v0,v1
51
52	vcmpequb. v6,v0,v18	/* 0xff if byte matches, 0x00 otherwise  */
53	beq	cr6,L(aligned)
54
55	vctzlsbb r0,v6
56	add	r3,r3,r0
57	blr
58
59L(aligned):
60	add	r3,r3,r9
61
62L(loop):
63	lxv	v0+32,0(r3)
64	vcmpequb. v6,v0,v18	/* 0xff if byte matches, 0x00 otherwise  */
65	bne	cr6,L(tail1)
66
67	lxv	v0+32,16(r3)
68	vcmpequb. v6,v0,v18	/* 0xff if byte matches, 0x00 otherwise  */
69	bne	cr6,L(tail2)
70
71	lxv	v0+32,32(r3)
72	vcmpequb. v6,v0,v18	/* 0xff if byte matches, 0x00 otherwise  */
73	bne	cr6,L(tail3)
74
75	lxv	v0+32,48(r3)
76	vcmpequb. v6,v0,v18	/* 0xff if byte matches, 0x00 otherwise  */
77	bne	cr6,L(tail4)
78
79	addi	r3,r3,64
80	b	L(loop)
81
82L(tail1):
83	vctzlsbb r0,v6
84	add	r3,r3,r0
85	blr
86
87L(tail2):
88	vctzlsbb r0,v6
89	add	r3,r3,r0
90	addi	r3,r3,16
91	blr
92
93L(tail3):
94	vctzlsbb r0,v6
95	add	r3,r3,r0
96	addi	r3,r3,32
97	blr
98
99L(tail4):
100	vctzlsbb r0,v6
101	add	r3,r3,r0
102	addi	r3,r3,48
103	blr
104
105END (RAWMEMCHR)
106weak_alias (__rawmemchr,rawmemchr)
107libc_hidden_builtin_def (__rawmemchr)
108