1! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
2! sum in a third limb vector.
3!
4! Copyright (C) 1995-2021 Free Software Foundation, Inc.
5!
6! This file is part of the GNU MP Library.
7!
8! The GNU MP Library is free software; you can redistribute it and/or modify
9! it under the terms of the GNU Lesser General Public License as published by
10! the Free Software Foundation; either version 2.1 of the License, or (at your
11! option) any later version.
12!
13! The GNU MP Library is distributed in the hope that it will be useful, but
14! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16! License for more details.
17!
18! You should have received a copy of the GNU Lesser General Public License
19! along with the GNU MP Library; see the file COPYING.LIB.  If not,
20! see <https://www.gnu.org/licenses/>.
21
22
23! INPUT PARAMETERS
24#define RES_PTR	%o0
25#define S1_PTR	%o1
26#define S2_PTR	%o2
27#define SIZE	%o3
28
29#include <sysdep.h>
30
31ENTRY(__mpn_add_n)
32	xor	S2_PTR,RES_PTR,%g1
33	andcc	%g1,4,%g0
34	bne	LOC(1)			! branch if alignment differs
35	 nop
36! **  V1a  **
37LOC(0):	andcc	RES_PTR,4,%g0		! RES_PTR unaligned? Side effect: cy=0
38	be	LOC(v1)			! if no, branch
39	 nop
40/* Add least significant limb separately to align RES_PTR and S2_PTR */
41	ld	[S1_PTR],%g4
42	add	S1_PTR,4,S1_PTR
43	ld	[S2_PTR],%g2
44	add	S2_PTR,4,S2_PTR
45	add	SIZE,-1,SIZE
46	addcc	%g4,%g2,%o4
47	st	%o4,[RES_PTR]
48	add	RES_PTR,4,RES_PTR
49LOC(v1):
50	addx	%g0,%g0,%o4		! save cy in register
51	cmp	SIZE,2			! if SIZE < 2 ...
52	bl	LOC(end2)		! ... branch to tail code
53	subcc	%g0,%o4,%g0		! restore cy
54
55	ld	[S1_PTR+0],%g4
56	addcc	SIZE,-10,SIZE
57	ld	[S1_PTR+4],%g1
58	ldd	[S2_PTR+0],%g2
59	blt	LOC(fin1)
60	subcc	%g0,%o4,%g0		! restore cy
61/* Add blocks of 8 limbs until less than 8 limbs remain */
62LOC(loop1):
63	addxcc	%g4,%g2,%o4
64	ld	[S1_PTR+8],%g4
65	addxcc	%g1,%g3,%o5
66	ld	[S1_PTR+12],%g1
67	ldd	[S2_PTR+8],%g2
68	std	%o4,[RES_PTR+0]
69	addxcc	%g4,%g2,%o4
70	ld	[S1_PTR+16],%g4
71	addxcc	%g1,%g3,%o5
72	ld	[S1_PTR+20],%g1
73	ldd	[S2_PTR+16],%g2
74	std	%o4,[RES_PTR+8]
75	addxcc	%g4,%g2,%o4
76	ld	[S1_PTR+24],%g4
77	addxcc	%g1,%g3,%o5
78	ld	[S1_PTR+28],%g1
79	ldd	[S2_PTR+24],%g2
80	std	%o4,[RES_PTR+16]
81	addxcc	%g4,%g2,%o4
82	ld	[S1_PTR+32],%g4
83	addxcc	%g1,%g3,%o5
84	ld	[S1_PTR+36],%g1
85	ldd	[S2_PTR+32],%g2
86	std	%o4,[RES_PTR+24]
87	addx	%g0,%g0,%o4		! save cy in register
88	addcc	SIZE,-8,SIZE
89	add	S1_PTR,32,S1_PTR
90	add	S2_PTR,32,S2_PTR
91	add	RES_PTR,32,RES_PTR
92	bge	LOC(loop1)
93	subcc	%g0,%o4,%g0		! restore cy
94
95LOC(fin1):
96	addcc	SIZE,8-2,SIZE
97	blt	LOC(end1)
98	subcc	%g0,%o4,%g0		! restore cy
99/* Add blocks of 2 limbs until less than 2 limbs remain */
100LOC(loope1):
101	addxcc	%g4,%g2,%o4
102	ld	[S1_PTR+8],%g4
103	addxcc	%g1,%g3,%o5
104	ld	[S1_PTR+12],%g1
105	ldd	[S2_PTR+8],%g2
106	std	%o4,[RES_PTR+0]
107	addx	%g0,%g0,%o4		! save cy in register
108	addcc	SIZE,-2,SIZE
109	add	S1_PTR,8,S1_PTR
110	add	S2_PTR,8,S2_PTR
111	add	RES_PTR,8,RES_PTR
112	bge	LOC(loope1)
113	subcc	%g0,%o4,%g0		! restore cy
114LOC(end1):
115	addxcc	%g4,%g2,%o4
116	addxcc	%g1,%g3,%o5
117	std	%o4,[RES_PTR+0]
118	addx	%g0,%g0,%o4		! save cy in register
119
120	andcc	SIZE,1,%g0
121	be	LOC(ret1)
122	subcc	%g0,%o4,%g0		! restore cy
123/* Add last limb */
124	ld	[S1_PTR+8],%g4
125	ld	[S2_PTR+8],%g2
126	addxcc	%g4,%g2,%o4
127	st	%o4,[RES_PTR+8]
128
129LOC(ret1):
130	retl
131	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
132
133LOC(1):	xor	S1_PTR,RES_PTR,%g1
134	andcc	%g1,4,%g0
135	bne	LOC(2)
136	nop
137! **  V1b  **
138	mov	S2_PTR,%g1
139	mov	S1_PTR,S2_PTR
140	b	LOC(0)
141	mov	%g1,S1_PTR
142
143! **  V2  **
144/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
145   alignment of S2_PTR and RES_PTR differ.  Since there are only two ways
146   things can be aligned (that we care about) we now know that the alignment
147   of S1_PTR and S2_PTR are the same.  */
148
149LOC(2):	cmp	SIZE,1
150	be	LOC(jone)
151	nop
152	andcc	S1_PTR,4,%g0		! S1_PTR unaligned? Side effect: cy=0
153	be	LOC(v2)			! if no, branch
154	nop
155/* Add least significant limb separately to align S1_PTR and S2_PTR */
156	ld	[S1_PTR],%g4
157	add	S1_PTR,4,S1_PTR
158	ld	[S2_PTR],%g2
159	add	S2_PTR,4,S2_PTR
160	add	SIZE,-1,SIZE
161	addcc	%g4,%g2,%o4
162	st	%o4,[RES_PTR]
163	add	RES_PTR,4,RES_PTR
164
165LOC(v2):
166	addx	%g0,%g0,%o4		! save cy in register
167	addcc	SIZE,-8,SIZE
168	blt	LOC(fin2)
169	subcc	%g0,%o4,%g0		! restore cy
170/* Add blocks of 8 limbs until less than 8 limbs remain */
171LOC(loop2):
172	ldd	[S1_PTR+0],%g2
173	ldd	[S2_PTR+0],%o4
174	addxcc	%g2,%o4,%g2
175	st	%g2,[RES_PTR+0]
176	addxcc	%g3,%o5,%g3
177	st	%g3,[RES_PTR+4]
178	ldd	[S1_PTR+8],%g2
179	ldd	[S2_PTR+8],%o4
180	addxcc	%g2,%o4,%g2
181	st	%g2,[RES_PTR+8]
182	addxcc	%g3,%o5,%g3
183	st	%g3,[RES_PTR+12]
184	ldd	[S1_PTR+16],%g2
185	ldd	[S2_PTR+16],%o4
186	addxcc	%g2,%o4,%g2
187	st	%g2,[RES_PTR+16]
188	addxcc	%g3,%o5,%g3
189	st	%g3,[RES_PTR+20]
190	ldd	[S1_PTR+24],%g2
191	ldd	[S2_PTR+24],%o4
192	addxcc	%g2,%o4,%g2
193	st	%g2,[RES_PTR+24]
194	addxcc	%g3,%o5,%g3
195	st	%g3,[RES_PTR+28]
196	addx	%g0,%g0,%o4		! save cy in register
197	addcc	SIZE,-8,SIZE
198	add	S1_PTR,32,S1_PTR
199	add	S2_PTR,32,S2_PTR
200	add	RES_PTR,32,RES_PTR
201	bge	LOC(loop2)
202	subcc	%g0,%o4,%g0		! restore cy
203
204LOC(fin2):
205	addcc	SIZE,8-2,SIZE
206	blt	LOC(end2)
207	subcc	%g0,%o4,%g0		! restore cy
208LOC(loope2):
209	ldd	[S1_PTR+0],%g2
210	ldd	[S2_PTR+0],%o4
211	addxcc	%g2,%o4,%g2
212	st	%g2,[RES_PTR+0]
213	addxcc	%g3,%o5,%g3
214	st	%g3,[RES_PTR+4]
215	addx	%g0,%g0,%o4		! save cy in register
216	addcc	SIZE,-2,SIZE
217	add	S1_PTR,8,S1_PTR
218	add	S2_PTR,8,S2_PTR
219	add	RES_PTR,8,RES_PTR
220	bge	LOC(loope2)
221	subcc	%g0,%o4,%g0		! restore cy
222LOC(end2):
223	andcc	SIZE,1,%g0
224	be	LOC(ret2)
225	subcc	%g0,%o4,%g0		! restore cy
226/* Add last limb */
227LOC(jone):
228	ld	[S1_PTR],%g4
229	ld	[S2_PTR],%g2
230	addxcc	%g4,%g2,%o4
231	st	%o4,[RES_PTR]
232
233LOC(ret2):
234	retl
235	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
236
237END(__mpn_add_n)
238