1/* Optimized with sse2 version of sincosf
2   Copyright (C) 2012-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20#include <errno.h>
21
22/* Short algorithm description:
23 *
24 *  1) if |x|==0:    sin(x)=x,
25 *                   cos(x)=1.
26 *  2) if |x|<2^-27: sin(x)=x-x*DP_SMALL, raising underflow only when needed,
27 *                   cos(x)=1-|x|.
28 *  3) if |x|<2^-5 : sin(x)=x+x*x^2*DP_SIN2_0+x^5*DP_SIN2_1,
29 *                   cos(x)=1+1*x^2*DP_COS2_0+x^5*DP_COS2_1
30 *  4) if |x|< Pi/4: sin(x)=x+x*x^2*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))),
31 *                   cos(x)=1+1*x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
32 *  5) if |x| < 9*Pi/4:
33 *      5.1) Range reduction:
34 *          k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, t=|x|-j*Pi/4.
35 *      5.2) Reconstruction:
36 *          sign_sin = sign(x) * (-1.0)^(( n   >>2)&1)
37 *          sign_cos =           (-1.0)^(((n+2)>>2)&1)
38 *          poly_sin = ((((S4*t^2 + S3)*t^2 + S2)*t^2 + S1)*t^2 + S0)*t^2*t+t
39 *          poly_cos = ((((C4*t^2 + C3)*t^2 + C2)*t^2 + C1)*t^2 + C0)*t^2*s+s
40 *          if(n&2 != 0) {
41 *              using cos(t) and sin(t) polynomials for |t|<Pi/4, results are
42 *              cos(x) = poly_sin * sign_cos
43 *              sin(x) = poly_cos * sign_sin
44 *          } else {
45 *              sin(x) = poly_sin * sign_sin
46 *              cos(x) = poly_cos * sign_cos
47 *          }
48 *  6) if |x| < 2^23, large args:
49 *      6.1) Range reduction:
50 *          k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4
51 *      6.2) Reconstruction same as (5.2).
52 *  7) if |x| >= 2^23, very large args:
53 *      7.1) Range reduction:
54 *          k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4.
55 *      7.2) Reconstruction same as (5.2).
56 *  8) if x is Inf, return x-x, and set errno=EDOM.
57 *  9) if x is NaN, return x-x.
58 *
59 * Special cases:
60 *  sin/cos(+-0) = +-0/1 not raising inexact/underflow,
61 *  sin/cos(subnormal) raises inexact/underflow,
62 *  sin/cos(min_normalized) raises inexact/underflow,
63 *  sin/cos(normalized) raises inexact,
64 *  sin/cos(Inf) = NaN, raises invalid, sets errno to EDOM,
65 *  sin/cos(NaN) = NaN.
66 */
67
68#ifdef	PIC
69# define MO1(symbol)			L(symbol)##@GOTOFF(%ebx)
70# define MO2(symbol,reg2,_scale)	L(symbol)##@GOTOFF(%ebx,reg2,_scale)
71# define CFI_PUSH(REG)	cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
72# define CFI_POP(REG)	cfi_adjust_cfa_offset(-4); cfi_restore(REG)
73# define PUSH(REG)			pushl REG; CFI_PUSH(REG)
74# define POP(REG)			popl REG; CFI_POP(REG)
75# define ENTRANCE			PUSH(%ebx); LOAD_PIC_REG(bx)
76# define RETURN				POP(%ebx); ret; CFI_PUSH(%ebx)
77# define ARG_X				8(%esp)
78# define ARG_SIN_PTR			12(%esp)
79# define ARG_COS_PTR			16(%esp)
80#else
81# define MO1(symbol)			L(symbol)
82# define MO2(symbol,reg2,_scale)	L(symbol)(,reg2,_scale)
83# define ENTRANCE
84# define RETURN				ret
85# define ARG_X				4(%esp)
86# define ARG_SIN_PTR			8(%esp)
87# define ARG_COS_PTR			12(%esp)
88#endif
89
90	.text
91ENTRY(__sincosf_sse2)
92	/* Input: single precision x on stack at address ARG_X */
93	/*        pointer to sin result on stack at address ARG_SIN_PTR */
94	/*        pointer to cos result on stack at address ARG_COS_PTR */
95
96	ENTRANCE
97	movl	ARG_X, %eax		/* Bits of x */
98	cvtss2sd ARG_X, %xmm0		/* DP x */
99	andl	$0x7fffffff, %eax	/* |x| */
100
101	cmpl	$0x3f490fdb, %eax	/* |x|<Pi/4 ? */
102	jb	L(arg_less_pio4)
103
104	/* Here if |x|>=Pi/4 */
105	movd	%eax, %xmm3		/* SP |x| */
106	andpd	MO1(DP_ABS_MASK),%xmm0	/* DP |x| */
107	movss	MO1(SP_INVPIO4), %xmm2	/* SP 1/(Pi/4) */
108
109	cmpl	$0x40e231d6, %eax	/* |x|<9*Pi/4 ? */
110	jae	L(large_args)
111
112	/* Here if Pi/4<=|x|<9*Pi/4 */
113	mulss	%xmm3, %xmm2		/* SP |x|/(Pi/4) */
114	movl	ARG_X, %ecx		/* Load x */
115	cvttss2si %xmm2, %eax		/* k, number of Pi/4 in x */
116	shrl	$29, %ecx		/* (sign of x) << 2 */
117	addl	$1, %eax		/* k+1 */
118	movl	$0x0e, %edx
119	andl	%eax, %edx		/* j = (k+1)&0x0e */
120	subsd	MO2(PIO4J,%edx,8), %xmm0/* t = |x| - j * Pi/4 */
121
122L(reconstruction):
123	/* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
124
125	movaps	%xmm0, %xmm4		/* t */
126	movhpd	MO1(DP_ONES), %xmm4	/* 1|t */
127	mulsd	%xmm0, %xmm0		/* y=t^2 */
128	movl	$2, %edx
129	unpcklpd %xmm0, %xmm0		/* y|y */
130	addl	%eax, %edx		/* k+2 */
131	movaps	%xmm0, %xmm1		/* y|y */
132	mulpd	%xmm0, %xmm0		/* z=t^4|z=t^4 */
133
134	movaps	MO1(DP_SC4), %xmm2	/* S4 */
135	mulpd	%xmm0, %xmm2		/* z*S4 */
136	movaps	MO1(DP_SC3), %xmm3	/* S3 */
137	mulpd	%xmm0, %xmm3		/* z*S3 */
138	xorl	%eax, %ecx		/* (sign_x ^ (k>>2))<<2 */
139	addpd	MO1(DP_SC2), %xmm2	/* S2+z*S4 */
140	mulpd	%xmm0, %xmm2		/* z*(S2+z*S4) */
141	shrl	$2, %edx		/* (k+2)>>2 */
142	addpd	MO1(DP_SC1), %xmm3	/* S1+z*S3 */
143	mulpd	%xmm0, %xmm3		/* z*(S1+z*S3) */
144	shrl	$2, %ecx		/* sign_x ^ k>>2 */
145	addpd	MO1(DP_SC0), %xmm2	/* S0+z*(S2+z*S4) */
146	andl	$1, %edx		/* sign_cos = ((k+2)>>2)&1 */
147	mulpd	%xmm1, %xmm2		/* y*(S0+z*(S2+z*S4)) */
148	andl	$1, %ecx		/* sign_sin = sign_x ^ ((k>>2)&1) */
149	addpd	%xmm2, %xmm3		/* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
150	mulpd	%xmm4, %xmm3		/*t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
151	testl	$2, %eax		/* n&2 != 0 ? */
152	addpd	%xmm4, %xmm3		/*t+t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
153	jnz	L(sin_result_sin_poly)
154
155/*L(sin_result_cos_poly):*/
156	/*
157	 * Here if
158	 * cos(x) = poly_sin * sign_cos
159	 * sin(x) = poly_cos * sign_sin
160	 */
161	movsd	MO2(DP_ONES,%ecx,8), %xmm4/* 0|sign_sin */
162	movhpd	MO2(DP_ONES,%edx,8), %xmm4/* sign_cos|sign_sin */
163	mulpd	%xmm4, %xmm3		/* result_cos|result_sin */
164	movl	ARG_SIN_PTR, %eax
165	cvtpd2ps %xmm3, %xmm0		/* SP results */
166	movl	ARG_COS_PTR, %ecx
167	movss	%xmm0, (%eax)		/* store sin(x) from xmm0[0] */
168	shufps	$1, %xmm0, %xmm0	/* move cos(x) to xmm0[0] */
169	movss	%xmm0, (%ecx)		/* store cos(x) */
170	RETURN
171
172	.p2align	4
173L(sin_result_sin_poly):
174	/*
175	 * Here if
176	 * sin(x) = poly_sin * sign_sin
177	 * cos(x) = poly_cos * sign_cos
178	 */
179	movsd	MO2(DP_ONES,%edx,8), %xmm4/* 0|sign_cos */
180	movhpd	MO2(DP_ONES,%ecx,8), %xmm4/* sign_sin|sign_cos */
181	mulpd	%xmm4, %xmm3		/* result_sin|result_cos */
182	movl	ARG_SIN_PTR, %eax
183	cvtpd2ps %xmm3, %xmm0		/* SP results */
184	movl	ARG_COS_PTR, %ecx
185	movss	%xmm0, (%ecx)		/* store cos(x) from xmm0[0] */
186	shufps	$1, %xmm0, %xmm0	/* move sin(x) to xmm0[0] */
187	movss	%xmm0, (%eax)		/* store sin(x) */
188	RETURN
189
190	.p2align	4
191L(large_args):
192	/* Here if |x|>=9*Pi/4 */
193	cmpl	$0x7f800000, %eax	/* x is Inf or NaN ? */
194	jae	L(arg_inf_or_nan)
195
196	/* Here if finite |x|>=9*Pi/4 */
197	cmpl	$0x4b000000, %eax	/* |x|<2^23 ? */
198	jae	L(very_large_args)
199
200	/* Here if 9*Pi/4<=|x|<2^23 */
201	movsd	MO1(DP_INVPIO4), %xmm1	/* 1/(Pi/4) */
202	mulsd	%xmm0, %xmm1		/* |x|/(Pi/4) */
203	cvttsd2si %xmm1, %eax		/* k=trunc(|x|/(Pi/4)) */
204	addl	$1, %eax		/* k+1 */
205	movl	%eax, %edx
206	andl	$0xfffffffe, %edx	/* j=(k+1)&0xfffffffe */
207	cvtsi2sdl %edx, %xmm4		/* DP j */
208	movl	ARG_X, %ecx		/* Load x */
209	movsd	MO1(DP_PIO4HI), %xmm2	/* -PIO4HI = high part of -Pi/4 */
210	shrl	$29, %ecx		/* (sign of x) << 2 */
211	mulsd	%xmm4, %xmm2		/* -j*PIO4HI */
212	movsd	MO1(DP_PIO4LO), %xmm3	/* -PIO4LO = low part of -Pi/4 */
213	addsd	%xmm2, %xmm0		/* |x| - j*PIO4HI */
214	mulsd	%xmm3, %xmm4		/* j*PIO4LO */
215	addsd	%xmm4, %xmm0		/* t = |x| - j*PIO4HI - j*PIO4LO */
216	jmp	L(reconstruction)
217
218	.p2align	4
219L(very_large_args):
220	/* Here if finite |x|>=2^23 */
221
222	/* bitpos = (ix>>23) - BIAS_32 + 59; */
223	shrl	$23, %eax		/* eb = biased exponent of x */
224	subl	$68, %eax		/* bitpos=eb-0x7f+59, where 0x7f */
225							/*is exponent bias */
226	movl	$28, %ecx		/* %cl=28 */
227	movl	%eax, %edx		/* bitpos copy */
228
229	/* j = bitpos/28; */
230	div	%cl			/* j in register %al=%ax/%cl */
231	movapd	%xmm0, %xmm3		/* |x| */
232	andl	$0xff, %eax		/* clear unneeded remainder from %ah*/
233
234	imull	$28, %eax, %ecx		/* j*28 */
235	movsd	MO1(DP_HI_MASK), %xmm4	/* DP_HI_MASK */
236	movapd	%xmm0, %xmm5		/* |x| */
237	mulsd	-2*8+MO2(_FPI,%eax,8), %xmm3/* tmp3 = FPI[j-2]*|x| */
238	movapd	%xmm0, %xmm1		/* |x| */
239	mulsd	-1*8+MO2(_FPI,%eax,8), %xmm5/* tmp2 = FPI[j-1]*|x| */
240	mulsd	0*8+MO2(_FPI,%eax,8), %xmm0/* tmp0 = FPI[j]*|x| */
241	addl	$19, %ecx		/* j*28+19 */
242	mulsd	1*8+MO2(_FPI,%eax,8), %xmm1/* tmp1 = FPI[j+1]*|x| */
243	cmpl	%ecx, %edx		/* bitpos>=j*28+19 ? */
244	jl	L(very_large_skip1)
245
246	/* Here if bitpos>=j*28+19 */
247	andpd	%xmm3, %xmm4		/* HI(tmp3) */
248	subsd	%xmm4, %xmm3		/* tmp3 = tmp3 - HI(tmp3) */
249L(very_large_skip1):
250
251	movsd	MO1(DP_2POW52), %xmm6
252	movapd	%xmm5, %xmm2		/* tmp2 copy */
253	addsd	%xmm3, %xmm5		/* tmp5 = tmp3 + tmp2 */
254	movl	$1, %edx
255	addsd	%xmm5, %xmm6		/* tmp6 = tmp5 + 2^52 */
256	movsd	8+MO1(DP_2POW52), %xmm4
257	movd	%xmm6, %eax		/* k = I64_LO(tmp6); */
258	addsd	%xmm6, %xmm4		/* tmp4 = tmp6 - 2^52 */
259	movl	ARG_X, %ecx		/* Load x */
260	comisd	%xmm5, %xmm4		/* tmp4 > tmp5 ? */
261	jbe	L(very_large_skip2)
262
263	/* Here if tmp4 > tmp5 */
264	subl	$1, %eax		/* k-- */
265	addsd	8+MO1(DP_ONES), %xmm4	/* tmp4 -= 1.0 */
266L(very_large_skip2):
267
268	andl	%eax, %edx		/* k&1 */
269	subsd	%xmm4, %xmm3		/* tmp3 -= tmp4 */
270	addsd	MO2(DP_ZERONE,%edx,8), %xmm3/* t  = DP_ZERONE[k&1] + tmp3 */
271	addsd	%xmm2, %xmm3		/* t += tmp2 */
272	shrl	$29, %ecx		/* (sign of x) << 2 */
273	addsd	%xmm3, %xmm0		/* t += tmp0 */
274	addl	$1, %eax		/* n=k+1 */
275	addsd	%xmm1, %xmm0		/* t += tmp1 */
276	mulsd	MO1(DP_PIO4), %xmm0	/* t *= PI04 */
277
278	jmp	L(reconstruction)	/* end of very_large_args peth */
279
280	.p2align	4
281L(arg_less_pio4):
282	/* Here if |x|<Pi/4 */
283	cmpl	$0x3d000000, %eax	/* |x|<2^-5 ? */
284	jl	L(arg_less_2pn5)
285
286	/* Here if 2^-5<=|x|<Pi/4 */
287	movaps	%xmm0, %xmm3		/* DP x */
288	movhpd	MO1(DP_ONES), %xmm3	/* DP 1|x */
289	mulsd	%xmm0, %xmm0		/* DP y=x^2 */
290	unpcklpd %xmm0, %xmm0		/* DP y|y */
291	movaps	%xmm0, %xmm1		/* y|y */
292	mulpd	%xmm0, %xmm0		/* z=x^4|z=x^4 */
293
294	movapd	MO1(DP_SC4), %xmm4	/* S4 */
295	mulpd	%xmm0, %xmm4		/* z*S4 */
296	movapd	MO1(DP_SC3), %xmm5	/* S3 */
297	mulpd	%xmm0, %xmm5		/* z*S3 */
298	addpd	MO1(DP_SC2), %xmm4	/* S2+z*S4 */
299	mulpd	%xmm0, %xmm4		/* z*(S2+z*S4) */
300	addpd	MO1(DP_SC1), %xmm5	/* S1+z*S3 */
301	mulpd	%xmm0, %xmm5		/* z*(S1+z*S3) */
302	addpd	MO1(DP_SC0), %xmm4	/* S0+z*(S2+z*S4) */
303	mulpd	%xmm1, %xmm4		/* y*(S0+z*(S2+z*S4)) */
304	mulpd	%xmm3, %xmm5		/* x*z*(S1+z*S3) */
305	mulpd	%xmm3, %xmm4		/* x*y*(S0+z*(S2+z*S4)) */
306	addpd	%xmm5, %xmm4		/*x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
307	movl	ARG_SIN_PTR, %eax
308	addpd	%xmm4, %xmm3		/*x+x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
309	movl	ARG_COS_PTR, %ecx
310	cvtpd2ps %xmm3, %xmm0		/* SP results */
311	movss	%xmm0, (%eax)		/* store sin(x) from xmm0[0] */
312	shufps	$1, %xmm0, %xmm0	/* move cos(x) to xmm0[0] */
313	movss	%xmm0, (%ecx)		/* store cos(x) */
314	RETURN
315
316	.p2align	4
317L(arg_less_2pn5):
318	/* Here if |x|<2^-5 */
319	cmpl	$0x32000000, %eax	/* |x|<2^-27 ? */
320	jl	L(arg_less_2pn27)
321
322	/* Here if 2^-27<=|x|<2^-5 */
323	movaps	%xmm0, %xmm1		/* DP x */
324	movhpd	MO1(DP_ONES), %xmm1	/* DP 1|x */
325	mulsd	%xmm0, %xmm0		/* DP x^2 */
326	unpcklpd %xmm0, %xmm0		/* DP x^2|x^2 */
327
328	movaps	MO1(DP_SINCOS2_1), %xmm3/* DP DP_SIN2_1 */
329	mulpd	%xmm0, %xmm3		/* DP x^2*DP_SIN2_1 */
330	addpd	MO1(DP_SINCOS2_0), %xmm3/* DP DP_SIN2_0+x^2*DP_SIN2_1 */
331	mulpd	%xmm0, %xmm3		/* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
332	mulpd	%xmm1, %xmm3		/* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
333	addpd	%xmm1, %xmm3		/* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
334	movl	ARG_SIN_PTR, %eax
335	cvtpd2ps %xmm3, %xmm0		/* SP results */
336	movl	ARG_COS_PTR, %ecx
337	movss	%xmm0, (%eax)		/* store sin(x) from xmm0[0] */
338	shufps	$1, %xmm0, %xmm0	/* move cos(x) to xmm0[0] */
339	movss	%xmm0, (%ecx)		/* store cos(x) */
340	RETURN
341
342	.p2align	4
343L(arg_less_2pn27):
344	movss	ARG_X, %xmm7		/* SP x */
345	cmpl	$0, %eax		/* x=0 ? */
346	je	L(arg_zero)		/* in case x=0 return sin(+-0)==+-0 */
347	/* Here if |x|<2^-27 */
348	/*
349	 * Special cases here:
350	 *  sin(subnormal) raises inexact/underflow
351	 *  sin(min_normalized) raises inexact/underflow
352	 *  sin(normalized) raises inexact
353	 *  cos(here)=1-|x| (raising inexact)
354	 */
355	movaps	%xmm0, %xmm3		/* DP x */
356	mulsd	MO1(DP_SMALL), %xmm0	/* DP x*DP_SMALL */
357	subsd	%xmm0, %xmm3		/* DP sin result is x-x*DP_SMALL */
358	andps	MO1(SP_ABS_MASK), %xmm7	/* SP |x| */
359	cvtsd2ss %xmm3, %xmm0		/* sin(x) */
360	movl	ARG_SIN_PTR, %eax
361	movss	MO1(SP_ONE), %xmm1	/* SP 1.0 */
362	movss	%xmm0, (%eax)		/* sin(x) store */
363	movl	ARG_COS_PTR, %ecx
364	subss	%xmm7, %xmm1		/* cos(x) */
365	movss	%xmm1, (%ecx)		/* cos(x) store */
366	RETURN
367
368	.p2align	4
369L(arg_zero):
370	movss	MO1(SP_ONE), %xmm0	/* 1.0 */
371	movl	ARG_SIN_PTR, %eax
372	movl	ARG_COS_PTR, %ecx
373	movss	%xmm7, (%eax)		/* sin(+-0)==x */
374	movss	%xmm0, (%ecx)		/* cos(+-0)==1 */
375	RETURN
376
377	.p2align	4
378L(arg_inf_or_nan):
379	movss	ARG_X, %xmm7		/* SP x */
380	/* Here if |x| is Inf or NAN */
381	jne	L(skip_errno_setting)	/* in case of x is NaN */
382
383	/* Here if x is Inf. Set errno to EDOM.  */
384	call	JUMPTARGET(__errno_location)
385	movl	$EDOM, (%eax)
386
387	.p2align	4
388L(skip_errno_setting):
389	/* Here if |x| is Inf or NAN. Continued. */
390	subss	%xmm7, %xmm7		/* x-x, result is NaN */
391	movl	ARG_SIN_PTR, %eax
392	movl	ARG_COS_PTR, %ecx
393	movss	%xmm7, (%eax)
394	movss	%xmm7, (%ecx)
395	RETURN
396END(__sincosf_sse2)
397
398	.section .rodata, "a"
399	.p2align 3
400L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
401	.long	0x00000000,0x00000000
402	.long	0x54442d18,0x3fe921fb
403	.long	0x54442d18,0x3ff921fb
404	.long	0x7f3321d2,0x4002d97c
405	.long	0x54442d18,0x400921fb
406	.long	0x2955385e,0x400f6a7a
407	.long	0x7f3321d2,0x4012d97c
408	.long	0xe9bba775,0x4015fdbb
409	.long	0x54442d18,0x401921fb
410	.long	0xbeccb2bb,0x401c463a
411	.long	0x2955385e,0x401f6a7a
412	.type L(PIO4J), @object
413	ASM_SIZE_DIRECTIVE(L(PIO4J))
414
415	.p2align 3
416L(_FPI): /* 4/Pi broken into sum of positive DP values */
417	.long	0x00000000,0x00000000
418	.long	0x6c000000,0x3ff45f30
419	.long	0x2a000000,0x3e3c9c88
420	.long	0xa8000000,0x3c54fe13
421	.long	0xd0000000,0x3aaf47d4
422	.long	0x6c000000,0x38fbb81b
423	.long	0xe0000000,0x3714acc9
424	.long	0x7c000000,0x3560e410
425	.long	0x56000000,0x33bca2c7
426	.long	0xac000000,0x31fbd778
427	.long	0xe0000000,0x300b7246
428	.long	0xe8000000,0x2e5d2126
429	.long	0x48000000,0x2c970032
430	.long	0xe8000000,0x2ad77504
431	.long	0xe0000000,0x290921cf
432	.long	0xb0000000,0x274deb1c
433	.long	0xe0000000,0x25829a73
434	.long	0xbe000000,0x23fd1046
435	.long	0x10000000,0x2224baed
436	.long	0x8e000000,0x20709d33
437	.long	0x80000000,0x1e535a2f
438	.long	0x64000000,0x1cef904e
439	.long	0x30000000,0x1b0d6398
440	.long	0x24000000,0x1964ce7d
441	.long	0x16000000,0x17b908bf
442	.type L(_FPI), @object
443	ASM_SIZE_DIRECTIVE(L(_FPI))
444
445/* Coefficients of polynomials for */
446/* sin(x)~=x+x*x^2*(DP_SIN2_0+x^2*DP_SIN2_1) in low  DP part, */
447/* cos(x)~=1+1*x^2*(DP_COS2_0+x^2*DP_COS2_1) in high DP part, */
448/* for |x|<2^-5. */
449	.p2align 4
450L(DP_SINCOS2_0):
451	.long	0x5543d49d,0xbfc55555
452	.long	0xff5cc6fd,0xbfdfffff
453	.type L(DP_SINCOS2_0), @object
454	ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_0))
455
456	.p2align 4
457L(DP_SINCOS2_1):
458	.long	0x75cec8c5,0x3f8110f4
459	.long	0xb178dac5,0x3fa55514
460	.type L(DP_SINCOS2_1), @object
461	ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_1))
462
463	.p2align 3
464L(DP_ZERONE):
465	.long	0x00000000,0x00000000	/* 0.0 */
466	.long	0x00000000,0xbff00000	/* 1.0 */
467	.type L(DP_ZERONE), @object
468	ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
469
470	.p2align 3
471L(DP_ONES):
472	.long	0x00000000,0x3ff00000	/* +1.0 */
473	.long	0x00000000,0xbff00000	/* -1.0 */
474	.type L(DP_ONES), @object
475	ASM_SIZE_DIRECTIVE(L(DP_ONES))
476
477/* Coefficients of polynomials for */
478/* sin(t)~=t+t*t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))) in low  DP part, */
479/* cos(t)~=1+1*t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))) in high DP part, */
480/* for |t|<Pi/4. */
481	.p2align 4
482L(DP_SC4):
483	.long	0x1674b58a,0xbe5a947e
484	.long	0xdd8844d7,0xbe923c97
485	.type L(DP_SC4), @object
486	ASM_SIZE_DIRECTIVE(L(DP_SC4))
487
488	.p2align 4
489L(DP_SC3):
490	.long	0x64e6b5b4,0x3ec71d72
491	.long	0x9ac43cc0,0x3efa00eb
492	.type L(DP_SC3), @object
493	ASM_SIZE_DIRECTIVE(L(DP_SC3))
494
495	.p2align 4
496L(DP_SC2):
497	.long	0x8b4bd1f9,0xbf2a019f
498	.long	0x348b6874,0xbf56c16b
499	.type L(DP_SC2), @object
500	ASM_SIZE_DIRECTIVE(L(DP_SC2))
501
502	.p2align 4
503L(DP_SC1):
504	.long	0x10c2688b,0x3f811111
505	.long	0x545c50c7,0x3fa55555
506	.type L(DP_SC1), @object
507	ASM_SIZE_DIRECTIVE(L(DP_SC1))
508
509	.p2align 4
510L(DP_SC0):
511	.long	0x55551cd9,0xbfc55555
512	.long	0xfffe98ae,0xbfdfffff
513	.type L(DP_SC0), @object
514	ASM_SIZE_DIRECTIVE(L(DP_SC0))
515
516	.p2align 3
517L(DP_SMALL):
518	.long	0x00000000,0x3cd00000	/* 2^(-50) */
519	.type L(DP_SMALL), @object
520	ASM_SIZE_DIRECTIVE(L(DP_SMALL))
521
522	.p2align 3
523L(DP_PIO4):
524	.long	0x54442d18,0x3fe921fb	/* Pi/4 */
525	.type L(DP_PIO4), @object
526	ASM_SIZE_DIRECTIVE(L(DP_PIO4))
527
528	.p2align 3
529L(DP_2POW52):
530	.long	0x00000000,0x43300000	/* +2^52 */
531	.long	0x00000000,0xc3300000	/* -2^52 */
532	.type L(DP_2POW52), @object
533	ASM_SIZE_DIRECTIVE(L(DP_2POW52))
534
535	.p2align 3
536L(DP_INVPIO4):
537	.long	0x6dc9c883,0x3ff45f30	/* 4/Pi */
538	.type L(DP_INVPIO4), @object
539	ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
540
541	.p2align 3
542L(DP_PIO4HI):
543	.long	0x54000000,0xbfe921fb	/* High part of Pi/4 */
544	.type L(DP_PIO4HI), @object
545	ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
546
547	.p2align 3
548L(DP_PIO4LO):
549	.long	0x11A62633,0xbe010b46	/* Low part of Pi/4 */
550	.type L(DP_PIO4LO), @object
551	ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
552
553	.p2align 2
554L(SP_INVPIO4):
555	.long	0x3fa2f983		/* 4/Pi */
556	.type L(SP_INVPIO4), @object
557	ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
558
559	.p2align 4
560L(DP_ABS_MASK): /* Mask for getting DP absolute value */
561	.long	0xffffffff,0x7fffffff
562	.long	0xffffffff,0x7fffffff
563	.type L(DP_ABS_MASK), @object
564	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
565
566	.p2align 3
567L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
568	.long	0x00000000,0xffffffff
569	.type L(DP_HI_MASK), @object
570	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
571
572	.p2align 4
573L(SP_ABS_MASK): /* Mask for getting SP absolute value */
574	.long	0x7fffffff,0x7fffffff
575	.long	0x7fffffff,0x7fffffff
576	.type L(SP_ABS_MASK), @object
577	ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
578
579	.p2align 2
580L(SP_ONE):
581	.long	0x3f800000		/* 1.0 */
582	.type L(SP_ONE), @object
583	ASM_SIZE_DIRECTIVE(L(SP_ONE))
584
585weak_alias(__sincosf, sincosf)
586