1! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
2! add the result to a second limb vector.
3
4! Copyright (C) 1992-2021 Free Software Foundation, Inc.
5
6! This file is part of the GNU MP Library.
7
8! The GNU MP Library is free software; you can redistribute it and/or modify
9! it under the terms of the GNU Lesser General Public License as published by
10! the Free Software Foundation; either version 2.1 of the License, or (at your
11! option) any later version.
12
13! The GNU MP Library is distributed in the hope that it will be useful, but
14! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16! License for more details.
17
18! You should have received a copy of the GNU Lesser General Public License
19! along with the GNU MP Library; see the file COPYING.LIB.  If not,
20! see <https://www.gnu.org/licenses/>.
21
22
23! INPUT PARAMETERS
24! res_ptr	o0
25! s1_ptr	o1
26! size		o2
27! s2_limb	o3
28
29#include <sysdep.h>
30
31ENTRY(__mpn_addmul_1)
32	ld	[%o1+0],%o4	! 1
33	sll	%o2,4,%g1
34	orcc	%g0,%g0,%g2
35	mov	%o7,%g4			! Save return address register
36	and	%g1,(4-1)<<4,%g1
371:	call	2f
38	 add	%o7,3f-1b,%g3
392:	jmp	%g3+%g1
40	 mov	%g4,%o7			! Restore return address register
41
42	.align	4
433:
44LOC(00):
45	add	%o0,-4,%o0
46	b	LOC(loop00)		/* 4, 8, 12, ... */
47	 add	%o1,-4,%o1
48	nop
49LOC(01):
50	b	LOC(loop01)		/* 1, 5, 9, ... */
51	 nop
52	nop
53	nop
54LOC(10):
55	add	%o0,-12,%o0	/* 2, 6, 10, ... */
56	b	LOC(loop10)
57	 add	%o1,4,%o1
58	nop
59LOC(11):
60	add	%o0,-8,%o0	/* 3, 7, 11, ... */
61	b	LOC(loop11)
62	 add	%o1,-8,%o1
63	nop
64
65LOC(loop):
66	addcc	%g3,%g2,%g3	! 1
67	ld	[%o1+4],%o4	! 2
68	rd	%y,%g2		! 1
69	addx	%g0,%g2,%g2
70	ld	[%o0+0],%g1	! 2
71	addcc	%g1,%g3,%g3
72	st	%g3,[%o0+0]	! 1
73LOC(loop00):
74	umul	%o4,%o3,%g3	! 2
75	ld	[%o0+4],%g1	! 2
76	addxcc	%g3,%g2,%g3	! 2
77	ld	[%o1+8],%o4	! 3
78	rd	%y,%g2		! 2
79	addx	%g0,%g2,%g2
80	nop
81	addcc	%g1,%g3,%g3
82	st	%g3,[%o0+4]	! 2
83LOC(loop11):
84	umul	%o4,%o3,%g3	! 3
85	addxcc	%g3,%g2,%g3	! 3
86	ld	[%o1+12],%o4	! 4
87	rd	%y,%g2		! 3
88	add	%o1,16,%o1
89	addx	%g0,%g2,%g2
90	ld	[%o0+8],%g1	! 2
91	addcc	%g1,%g3,%g3
92	st	%g3,[%o0+8]	! 3
93LOC(loop10):
94	umul	%o4,%o3,%g3	! 4
95	addxcc	%g3,%g2,%g3	! 4
96	ld	[%o1+0],%o4	! 1
97	rd	%y,%g2		! 4
98	addx	%g0,%g2,%g2
99	ld	[%o0+12],%g1	! 2
100	addcc	%g1,%g3,%g3
101	st	%g3,[%o0+12]	! 4
102	add	%o0,16,%o0
103	addx	%g0,%g2,%g2
104LOC(loop01):
105	addcc	%o2,-4,%o2
106	bg	LOC(loop)
107	 umul	%o4,%o3,%g3	! 1
108
109	addcc	%g3,%g2,%g3	! 4
110	rd	%y,%g2		! 4
111	addx	%g0,%g2,%g2
112	ld	[%o0+0],%g1	! 2
113	addcc	%g1,%g3,%g3
114	st	%g3,[%o0+0]	! 4
115	retl
116	 addx	%g0,%g2,%o0
117
118END(__mpn_addmul_1)
119