1/* Optimized strchr implementation for PowerPC.
2   Copyright (C) 1997-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20
21/* See strlen.s for comments on how this works.  */
22
23/* char * [r3] strchr (const char *s [r3] , int c [r4] )  */
24
25ENTRY (strchr)
26
27#define rTMP1	r0
28#define rRTN	r3	/* outgoing result */
29#define rSTR	r8	/* current word pointer */
30#define rCHR	r4	/* byte we're looking for, spread over the whole word */
31#define rWORD	r5	/* the current word */
32#define rCLZB	rCHR	/* leading zero byte count */
33#define rFEFE	r6	/* constant 0xfefefeff (-0x01010101) */
34#define r7F7F	r7	/* constant 0x7f7f7f7f */
35#define rTMP2	r9
36#define rIGN	r10	/* number of bits we should ignore in the first word */
37#define rMASK	r11	/* mask with the bits to ignore set to 0 */
38#define rTMP3	r12
39#define rTMP4	rIGN
40#define rTMP5	rMASK
41
42
43	rlwimi	rCHR, rCHR, 8, 16, 23
44	li	rMASK, -1
45	rlwimi	rCHR, rCHR, 16, 0, 15
46	rlwinm	rIGN, rRTN, 3, 27, 28
47	lis	rFEFE, -0x101
48	lis	r7F7F, 0x7f7f
49	clrrwi	rSTR, rRTN, 2
50	addi	rFEFE, rFEFE, -0x101
51	addi	r7F7F, r7F7F, 0x7f7f
52/* Test the first (partial?) word.  */
53	lwz	rWORD, 0(rSTR)
54#ifdef __LITTLE_ENDIAN__
55	slw	rMASK, rMASK, rIGN
56#else
57	srw	rMASK, rMASK, rIGN
58#endif
59	orc	rWORD, rWORD, rMASK
60	add	rTMP1, rFEFE, rWORD
61	nor	rTMP2, r7F7F, rWORD
62	and.	rTMP4, rTMP1, rTMP2
63	xor	rTMP3, rCHR, rWORD
64	orc	rTMP3, rTMP3, rMASK
65	b	L(loopentry)
66
67/* The loop.  */
68
69L(loop):
70	lwzu	rWORD, 4(rSTR)
71	and.	rTMP5, rTMP1, rTMP2
72/* Test for 0.	*/
73	add	rTMP1, rFEFE, rWORD /* x - 0x01010101.  */
74	nor	rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080.  */
75	bne	L(foundit)
76	and.	rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080.  */
77/* Start test for the bytes we're looking for.  */
78	xor	rTMP3, rCHR, rWORD
79L(loopentry):
80	add	rTMP1, rFEFE, rTMP3
81	nor	rTMP2, r7F7F, rTMP3
82	beq	L(loop)
83
84/* There is a zero byte in the word, but may also be a matching byte (either
85   before or after the zero byte).  In fact, we may be looking for a
86   zero byte, in which case we return a match.  */
87	and.	rTMP5, rTMP1, rTMP2
88	li	rRTN, 0
89	beqlr
90/* At this point:
91   rTMP5 bytes are 0x80 for each match of c, 0 otherwise.
92   rTMP4 bytes are 0x80 for each match of 0, 0 otherwise.
93   But there may be false matches in the next most significant byte from
94   a true match due to carries.  This means we need to recalculate the
95   matches using a longer method for big-endian.  */
96#ifdef __LITTLE_ENDIAN__
97	addi	rTMP1, rTMP5, -1
98	andc	rTMP1, rTMP1, rTMP5
99	cntlzw	rCLZB, rTMP1
100	addi	rTMP2, rTMP4, -1
101	andc	rTMP2, rTMP2, rTMP4
102	cmplw	rTMP1, rTMP2
103	bgtlr
104	subfic	rCLZB, rCLZB, 32-7
105#else
106/* I think we could reduce this by two instructions by keeping the "nor"
107   results from the loop for reuse here.  See strlen.S tail.  Similarly
108   one instruction could be pruned from L(foundit).  */
109	and	rFEFE, r7F7F, rWORD
110	or	rTMP5, r7F7F, rWORD
111	and	rTMP1, r7F7F, rTMP3
112	or	rTMP4, r7F7F, rTMP3
113	add	rFEFE, rFEFE, r7F7F
114	add	rTMP1, rTMP1, r7F7F
115	nor	rWORD, rTMP5, rFEFE
116	nor	rTMP2, rTMP4, rTMP1
117	cntlzw	rCLZB, rTMP2
118	cmplw	rWORD, rTMP2
119	bgtlr
120#endif
121	srwi	rCLZB, rCLZB, 3
122	add	rRTN, rSTR, rCLZB
123	blr
124
125L(foundit):
126#ifdef __LITTLE_ENDIAN__
127	addi	rTMP1, rTMP5, -1
128	andc	rTMP1, rTMP1, rTMP5
129	cntlzw	rCLZB, rTMP1
130	subfic	rCLZB, rCLZB, 32-7-32
131	srawi	rCLZB, rCLZB, 3
132#else
133	and	rTMP1, r7F7F, rTMP3
134	or	rTMP4, r7F7F, rTMP3
135	add	rTMP1, rTMP1, r7F7F
136	nor	rTMP2, rTMP4, rTMP1
137	cntlzw	rCLZB, rTMP2
138	subi	rSTR, rSTR, 4
139	srwi	rCLZB, rCLZB, 3
140#endif
141	add	rRTN, rSTR, rCLZB
142	blr
143END (strchr)
144
145weak_alias (strchr, index)
146libc_hidden_builtin_def (strchr)
147