1/* Optimized strcmp implementation for PowerPC32.
2   Copyright (C) 2003-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20
21/* See strlen.s for comments on how the end-of-string testing works.  */
22
23/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
24
25EALIGN (strncmp, 4, 0)
26
27#define rTMP2	r0
28#define rRTN	r3
29#define rSTR1	r3	/* first string arg */
30#define rSTR2	r4	/* second string arg */
31#define rN	r5	/* max string length */
32#define rWORD1	r6	/* current word in s1 */
33#define rWORD2	r7	/* current word in s2 */
34#define rWORD3  r10
35#define rWORD4  r11
36#define rFEFE	r8	/* constant 0xfefefeff (-0x01010101) */
37#define r7F7F	r9	/* constant 0x7f7f7f7f */
38#define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f) */
39#define rBITDIF	r11	/* bits that differ in s1 & s2 words */
40#define rTMP	r12
41
42	dcbt	0,rSTR1
43	or	rTMP, rSTR2, rSTR1
44	lis	r7F7F, 0x7f7f
45	dcbt	0,rSTR2
46	clrlwi.	rTMP, rTMP, 30
47	cmplwi	cr1, rN, 0
48	lis	rFEFE, -0x101
49	bne	L(unaligned)
50/* We are word aligned so set up for two loops.  first a word
51   loop, then fall into the byte loop if any residual.  */
52	srwi.	rTMP, rN, 2
53	clrlwi	rN, rN, 30
54	addi	rFEFE, rFEFE, -0x101
55	addi	r7F7F, r7F7F, 0x7f7f
56	cmplwi	cr1, rN, 0
57	beq	L(unaligned)
58
59	mtctr	rTMP	/* Power4 wants mtctr 1st in dispatch group.  */
60	lwz	rWORD1, 0(rSTR1)
61	lwz	rWORD2, 0(rSTR2)
62	b	L(g1)
63
64L(g0):
65	lwzu	rWORD1, 4(rSTR1)
66	bne-	cr1, L(different)
67	lwzu	rWORD2, 4(rSTR2)
68L(g1):	add	rTMP, rFEFE, rWORD1
69	nor	rNEG, r7F7F, rWORD1
70	bdz	L(tail)
71	and.	rTMP, rTMP, rNEG
72	cmpw	cr1, rWORD1, rWORD2
73	beq+	L(g0)
74
75/* OK. We've hit the end of the string. We need to be careful that
76   we don't compare two strings as different because of gunk beyond
77   the end of the strings...  */
78
79#ifdef __LITTLE_ENDIAN__
80L(endstring):
81	slwi	rTMP, rTMP, 1
82	addi    rTMP2, rTMP, -1
83	andc    rTMP2, rTMP2, rTMP
84	and	rWORD2, rWORD2, rTMP2		/* Mask off gunk.  */
85	and	rWORD1, rWORD1, rTMP2
86	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
87	rlwinm	rTMP, rWORD1, 8, 0xffffffff
88	rldimi	rTMP2, rWORD2, 24, 32
89	rldimi	rTMP, rWORD1, 24, 32
90	rlwimi	rTMP2, rWORD2, 24, 16, 23
91	rlwimi	rTMP, rWORD1, 24, 16, 23
92	xor.	rBITDIF, rTMP, rTMP2
93	sub	rRTN, rTMP, rTMP2
94	bgelr+
95	ori	rRTN, rTMP2, 1
96	blr
97
98L(different):
99	lwz	rWORD1, -4(rSTR1)
100	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
101	rlwinm	rTMP, rWORD1, 8, 0xffffffff
102	rldimi	rTMP2, rWORD2, 24, 32
103	rldimi	rTMP, rWORD1, 24, 32
104	rlwimi	rTMP2, rWORD2, 24, 16, 23
105	rlwimi	rTMP, rWORD1, 24, 16, 23
106	xor.	rBITDIF, rTMP, rTMP2
107	sub	rRTN, rTMP, rTMP2
108	bgelr+
109	ori	rRTN, rTMP2, 1
110	blr
111
112#else
113L(endstring):
114	and	rTMP, r7F7F, rWORD1
115	beq	cr1, L(equal)
116	add	rTMP, rTMP, r7F7F
117	xor.	rBITDIF, rWORD1, rWORD2
118	andc	rNEG, rNEG, rTMP
119	blt-	L(highbit)
120	cntlzw	rBITDIF, rBITDIF
121	cntlzw	rNEG, rNEG
122	addi	rNEG, rNEG, 7
123	cmpw	cr1, rNEG, rBITDIF
124	sub	rRTN, rWORD1, rWORD2
125	bgelr+	cr1
126L(equal):
127	li	rRTN, 0
128	blr
129
130L(different):
131	lwz	rWORD1, -4(rSTR1)
132	xor.	rBITDIF, rWORD1, rWORD2
133	sub	rRTN, rWORD1, rWORD2
134	bgelr+
135L(highbit):
136	ori	rRTN, rWORD2, 1
137	blr
138#endif
139
140/* Oh well.  In this case, we just do a byte-by-byte comparison.  */
141	.align 4
142L(tail):
143	and.	rTMP, rTMP, rNEG
144	cmpw	cr1, rWORD1, rWORD2
145	bne-	L(endstring)
146	addi	rSTR1, rSTR1, 4
147	bne-	cr1, L(different)
148	addi	rSTR2, rSTR2, 4
149	cmplwi	cr1, rN, 0
150L(unaligned):
151	mtctr   rN	/* Power4 wants mtctr 1st in dispatch group */
152	ble	cr1, L(ux)
153L(uz):
154	lbz	rWORD1, 0(rSTR1)
155	lbz	rWORD2, 0(rSTR2)
156	.align 4
157L(u1):
158	cmpwi	cr1, rWORD1, 0
159	bdz	L(u4)
160	cmpw	rWORD1, rWORD2
161	beq-	cr1, L(u4)
162	bne-	L(u4)
163	lbzu    rWORD3, 1(rSTR1)
164	lbzu	rWORD4, 1(rSTR2)
165	cmpwi	cr1, rWORD3, 0
166	bdz	L(u3)
167	cmpw	rWORD3, rWORD4
168	beq-    cr1, L(u3)
169	bne-    L(u3)
170	lbzu	rWORD1, 1(rSTR1)
171	lbzu	rWORD2, 1(rSTR2)
172	cmpwi	cr1, rWORD1, 0
173	bdz	L(u4)
174	cmpw	rWORD1, rWORD2
175	beq-	cr1, L(u4)
176	bne-	L(u4)
177	lbzu	rWORD3, 1(rSTR1)
178	lbzu	rWORD4, 1(rSTR2)
179	cmpwi	cr1, rWORD3, 0
180	bdz	L(u3)
181	cmpw	rWORD3, rWORD4
182	beq-    cr1, L(u3)
183	bne-	L(u3)
184	lbzu	rWORD1, 1(rSTR1)
185	lbzu	rWORD2, 1(rSTR2)
186	b       L(u1)
187
188L(u3):  sub     rRTN, rWORD3, rWORD4
189	blr
190L(u4):	sub	rRTN, rWORD1, rWORD2
191	blr
192L(ux):
193	li	rRTN, 0
194	blr
195END (strncmp)
196libc_hidden_builtin_def (strncmp)
197