1/* Optimized with sse2 version of sincosf 2 Copyright (C) 2012-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20#include <errno.h> 21 22/* Short algorithm description: 23 * 24 * 1) if |x|==0: sin(x)=x, 25 * cos(x)=1. 26 * 2) if |x|<2^-27: sin(x)=x-x*DP_SMALL, raising underflow only when needed, 27 * cos(x)=1-|x|. 28 * 3) if |x|<2^-5 : sin(x)=x+x*x^2*DP_SIN2_0+x^5*DP_SIN2_1, 29 * cos(x)=1+1*x^2*DP_COS2_0+x^5*DP_COS2_1 30 * 4) if |x|< Pi/4: sin(x)=x+x*x^2*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))), 31 * cos(x)=1+1*x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). 32 * 5) if |x| < 9*Pi/4: 33 * 5.1) Range reduction: 34 * k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, t=|x|-j*Pi/4. 35 * 5.2) Reconstruction: 36 * sign_sin = sign(x) * (-1.0)^(( n >>2)&1) 37 * sign_cos = (-1.0)^(((n+2)>>2)&1) 38 * poly_sin = ((((S4*t^2 + S3)*t^2 + S2)*t^2 + S1)*t^2 + S0)*t^2*t+t 39 * poly_cos = ((((C4*t^2 + C3)*t^2 + C2)*t^2 + C1)*t^2 + C0)*t^2*s+s 40 * if(n&2 != 0) { 41 * using cos(t) and sin(t) polynomials for |t|<Pi/4, results are 42 * cos(x) = poly_sin * sign_cos 43 * sin(x) = poly_cos * sign_sin 44 * } else { 45 * sin(x) = poly_sin * sign_sin 46 * cos(x) = poly_cos * sign_cos 47 * } 48 * 6) if |x| < 2^23, large args: 49 * 6.1) Range reduction: 50 * k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4 51 * 6.2) Reconstruction same as (5.2). 52 * 7) if |x| >= 2^23, very large args: 53 * 7.1) Range reduction: 54 * k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4. 55 * 7.2) Reconstruction same as (5.2). 56 * 8) if x is Inf, return x-x, and set errno=EDOM. 57 * 9) if x is NaN, return x-x. 58 * 59 * Special cases: 60 * sin/cos(+-0) = +-0/1 not raising inexact/underflow, 61 * sin/cos(subnormal) raises inexact/underflow, 62 * sin/cos(min_normalized) raises inexact/underflow, 63 * sin/cos(normalized) raises inexact, 64 * sin/cos(Inf) = NaN, raises invalid, sets errno to EDOM, 65 * sin/cos(NaN) = NaN. 66 */ 67 68#ifdef PIC 69# define MO1(symbol) L(symbol)##@GOTOFF(%ebx) 70# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale) 71# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0) 72# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG) 73# define PUSH(REG) pushl REG; CFI_PUSH(REG) 74# define POP(REG) popl REG; CFI_POP(REG) 75# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx) 76# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx) 77# define ARG_X 8(%esp) 78# define ARG_SIN_PTR 12(%esp) 79# define ARG_COS_PTR 16(%esp) 80#else 81# define MO1(symbol) L(symbol) 82# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale) 83# define ENTRANCE 84# define RETURN ret 85# define ARG_X 4(%esp) 86# define ARG_SIN_PTR 8(%esp) 87# define ARG_COS_PTR 12(%esp) 88#endif 89 90 .text 91ENTRY(__sincosf_sse2) 92 /* Input: single precision x on stack at address ARG_X */ 93 /* pointer to sin result on stack at address ARG_SIN_PTR */ 94 /* pointer to cos result on stack at address ARG_COS_PTR */ 95 96 ENTRANCE 97 movl ARG_X, %eax /* Bits of x */ 98 cvtss2sd ARG_X, %xmm0 /* DP x */ 99 andl $0x7fffffff, %eax /* |x| */ 100 101 cmpl $0x3f490fdb, %eax /* |x|<Pi/4 ? */ 102 jb L(arg_less_pio4) 103 104 /* Here if |x|>=Pi/4 */ 105 movd %eax, %xmm3 /* SP |x| */ 106 andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */ 107 movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */ 108 109 cmpl $0x40e231d6, %eax /* |x|<9*Pi/4 ? */ 110 jae L(large_args) 111 112 /* Here if Pi/4<=|x|<9*Pi/4 */ 113 mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */ 114 movl ARG_X, %ecx /* Load x */ 115 cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */ 116 shrl $29, %ecx /* (sign of x) << 2 */ 117 addl $1, %eax /* k+1 */ 118 movl $0x0e, %edx 119 andl %eax, %edx /* j = (k+1)&0x0e */ 120 subsd MO2(PIO4J,%edx,8), %xmm0/* t = |x| - j * Pi/4 */ 121 122L(reconstruction): 123 /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */ 124 125 movaps %xmm0, %xmm4 /* t */ 126 movhpd MO1(DP_ONES), %xmm4 /* 1|t */ 127 mulsd %xmm0, %xmm0 /* y=t^2 */ 128 movl $2, %edx 129 unpcklpd %xmm0, %xmm0 /* y|y */ 130 addl %eax, %edx /* k+2 */ 131 movaps %xmm0, %xmm1 /* y|y */ 132 mulpd %xmm0, %xmm0 /* z=t^4|z=t^4 */ 133 134 movaps MO1(DP_SC4), %xmm2 /* S4 */ 135 mulpd %xmm0, %xmm2 /* z*S4 */ 136 movaps MO1(DP_SC3), %xmm3 /* S3 */ 137 mulpd %xmm0, %xmm3 /* z*S3 */ 138 xorl %eax, %ecx /* (sign_x ^ (k>>2))<<2 */ 139 addpd MO1(DP_SC2), %xmm2 /* S2+z*S4 */ 140 mulpd %xmm0, %xmm2 /* z*(S2+z*S4) */ 141 shrl $2, %edx /* (k+2)>>2 */ 142 addpd MO1(DP_SC1), %xmm3 /* S1+z*S3 */ 143 mulpd %xmm0, %xmm3 /* z*(S1+z*S3) */ 144 shrl $2, %ecx /* sign_x ^ k>>2 */ 145 addpd MO1(DP_SC0), %xmm2 /* S0+z*(S2+z*S4) */ 146 andl $1, %edx /* sign_cos = ((k+2)>>2)&1 */ 147 mulpd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */ 148 andl $1, %ecx /* sign_sin = sign_x ^ ((k>>2)&1) */ 149 addpd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ 150 mulpd %xmm4, %xmm3 /*t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/ 151 testl $2, %eax /* n&2 != 0 ? */ 152 addpd %xmm4, %xmm3 /*t+t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/ 153 jnz L(sin_result_sin_poly) 154 155/*L(sin_result_cos_poly):*/ 156 /* 157 * Here if 158 * cos(x) = poly_sin * sign_cos 159 * sin(x) = poly_cos * sign_sin 160 */ 161 movsd MO2(DP_ONES,%ecx,8), %xmm4/* 0|sign_sin */ 162 movhpd MO2(DP_ONES,%edx,8), %xmm4/* sign_cos|sign_sin */ 163 mulpd %xmm4, %xmm3 /* result_cos|result_sin */ 164 movl ARG_SIN_PTR, %eax 165 cvtpd2ps %xmm3, %xmm0 /* SP results */ 166 movl ARG_COS_PTR, %ecx 167 movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */ 168 shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */ 169 movss %xmm0, (%ecx) /* store cos(x) */ 170 RETURN 171 172 .p2align 4 173L(sin_result_sin_poly): 174 /* 175 * Here if 176 * sin(x) = poly_sin * sign_sin 177 * cos(x) = poly_cos * sign_cos 178 */ 179 movsd MO2(DP_ONES,%edx,8), %xmm4/* 0|sign_cos */ 180 movhpd MO2(DP_ONES,%ecx,8), %xmm4/* sign_sin|sign_cos */ 181 mulpd %xmm4, %xmm3 /* result_sin|result_cos */ 182 movl ARG_SIN_PTR, %eax 183 cvtpd2ps %xmm3, %xmm0 /* SP results */ 184 movl ARG_COS_PTR, %ecx 185 movss %xmm0, (%ecx) /* store cos(x) from xmm0[0] */ 186 shufps $1, %xmm0, %xmm0 /* move sin(x) to xmm0[0] */ 187 movss %xmm0, (%eax) /* store sin(x) */ 188 RETURN 189 190 .p2align 4 191L(large_args): 192 /* Here if |x|>=9*Pi/4 */ 193 cmpl $0x7f800000, %eax /* x is Inf or NaN ? */ 194 jae L(arg_inf_or_nan) 195 196 /* Here if finite |x|>=9*Pi/4 */ 197 cmpl $0x4b000000, %eax /* |x|<2^23 ? */ 198 jae L(very_large_args) 199 200 /* Here if 9*Pi/4<=|x|<2^23 */ 201 movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */ 202 mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */ 203 cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */ 204 addl $1, %eax /* k+1 */ 205 movl %eax, %edx 206 andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */ 207 cvtsi2sdl %edx, %xmm4 /* DP j */ 208 movl ARG_X, %ecx /* Load x */ 209 movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */ 210 shrl $29, %ecx /* (sign of x) << 2 */ 211 mulsd %xmm4, %xmm2 /* -j*PIO4HI */ 212 movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */ 213 addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */ 214 mulsd %xmm3, %xmm4 /* j*PIO4LO */ 215 addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */ 216 jmp L(reconstruction) 217 218 .p2align 4 219L(very_large_args): 220 /* Here if finite |x|>=2^23 */ 221 222 /* bitpos = (ix>>23) - BIAS_32 + 59; */ 223 shrl $23, %eax /* eb = biased exponent of x */ 224 subl $68, %eax /* bitpos=eb-0x7f+59, where 0x7f */ 225 /*is exponent bias */ 226 movl $28, %ecx /* %cl=28 */ 227 movl %eax, %edx /* bitpos copy */ 228 229 /* j = bitpos/28; */ 230 div %cl /* j in register %al=%ax/%cl */ 231 movapd %xmm0, %xmm3 /* |x| */ 232 andl $0xff, %eax /* clear unneeded remainder from %ah*/ 233 234 imull $28, %eax, %ecx /* j*28 */ 235 movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */ 236 movapd %xmm0, %xmm5 /* |x| */ 237 mulsd -2*8+MO2(_FPI,%eax,8), %xmm3/* tmp3 = FPI[j-2]*|x| */ 238 movapd %xmm0, %xmm1 /* |x| */ 239 mulsd -1*8+MO2(_FPI,%eax,8), %xmm5/* tmp2 = FPI[j-1]*|x| */ 240 mulsd 0*8+MO2(_FPI,%eax,8), %xmm0/* tmp0 = FPI[j]*|x| */ 241 addl $19, %ecx /* j*28+19 */ 242 mulsd 1*8+MO2(_FPI,%eax,8), %xmm1/* tmp1 = FPI[j+1]*|x| */ 243 cmpl %ecx, %edx /* bitpos>=j*28+19 ? */ 244 jl L(very_large_skip1) 245 246 /* Here if bitpos>=j*28+19 */ 247 andpd %xmm3, %xmm4 /* HI(tmp3) */ 248 subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */ 249L(very_large_skip1): 250 251 movsd MO1(DP_2POW52), %xmm6 252 movapd %xmm5, %xmm2 /* tmp2 copy */ 253 addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */ 254 movl $1, %edx 255 addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */ 256 movsd 8+MO1(DP_2POW52), %xmm4 257 movd %xmm6, %eax /* k = I64_LO(tmp6); */ 258 addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */ 259 movl ARG_X, %ecx /* Load x */ 260 comisd %xmm5, %xmm4 /* tmp4 > tmp5 ? */ 261 jbe L(very_large_skip2) 262 263 /* Here if tmp4 > tmp5 */ 264 subl $1, %eax /* k-- */ 265 addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */ 266L(very_large_skip2): 267 268 andl %eax, %edx /* k&1 */ 269 subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */ 270 addsd MO2(DP_ZERONE,%edx,8), %xmm3/* t = DP_ZERONE[k&1] + tmp3 */ 271 addsd %xmm2, %xmm3 /* t += tmp2 */ 272 shrl $29, %ecx /* (sign of x) << 2 */ 273 addsd %xmm3, %xmm0 /* t += tmp0 */ 274 addl $1, %eax /* n=k+1 */ 275 addsd %xmm1, %xmm0 /* t += tmp1 */ 276 mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */ 277 278 jmp L(reconstruction) /* end of very_large_args peth */ 279 280 .p2align 4 281L(arg_less_pio4): 282 /* Here if |x|<Pi/4 */ 283 cmpl $0x3d000000, %eax /* |x|<2^-5 ? */ 284 jl L(arg_less_2pn5) 285 286 /* Here if 2^-5<=|x|<Pi/4 */ 287 movaps %xmm0, %xmm3 /* DP x */ 288 movhpd MO1(DP_ONES), %xmm3 /* DP 1|x */ 289 mulsd %xmm0, %xmm0 /* DP y=x^2 */ 290 unpcklpd %xmm0, %xmm0 /* DP y|y */ 291 movaps %xmm0, %xmm1 /* y|y */ 292 mulpd %xmm0, %xmm0 /* z=x^4|z=x^4 */ 293 294 movapd MO1(DP_SC4), %xmm4 /* S4 */ 295 mulpd %xmm0, %xmm4 /* z*S4 */ 296 movapd MO1(DP_SC3), %xmm5 /* S3 */ 297 mulpd %xmm0, %xmm5 /* z*S3 */ 298 addpd MO1(DP_SC2), %xmm4 /* S2+z*S4 */ 299 mulpd %xmm0, %xmm4 /* z*(S2+z*S4) */ 300 addpd MO1(DP_SC1), %xmm5 /* S1+z*S3 */ 301 mulpd %xmm0, %xmm5 /* z*(S1+z*S3) */ 302 addpd MO1(DP_SC0), %xmm4 /* S0+z*(S2+z*S4) */ 303 mulpd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */ 304 mulpd %xmm3, %xmm5 /* x*z*(S1+z*S3) */ 305 mulpd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */ 306 addpd %xmm5, %xmm4 /*x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/ 307 movl ARG_SIN_PTR, %eax 308 addpd %xmm4, %xmm3 /*x+x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/ 309 movl ARG_COS_PTR, %ecx 310 cvtpd2ps %xmm3, %xmm0 /* SP results */ 311 movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */ 312 shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */ 313 movss %xmm0, (%ecx) /* store cos(x) */ 314 RETURN 315 316 .p2align 4 317L(arg_less_2pn5): 318 /* Here if |x|<2^-5 */ 319 cmpl $0x32000000, %eax /* |x|<2^-27 ? */ 320 jl L(arg_less_2pn27) 321 322 /* Here if 2^-27<=|x|<2^-5 */ 323 movaps %xmm0, %xmm1 /* DP x */ 324 movhpd MO1(DP_ONES), %xmm1 /* DP 1|x */ 325 mulsd %xmm0, %xmm0 /* DP x^2 */ 326 unpcklpd %xmm0, %xmm0 /* DP x^2|x^2 */ 327 328 movaps MO1(DP_SINCOS2_1), %xmm3/* DP DP_SIN2_1 */ 329 mulpd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */ 330 addpd MO1(DP_SINCOS2_0), %xmm3/* DP DP_SIN2_0+x^2*DP_SIN2_1 */ 331 mulpd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */ 332 mulpd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */ 333 addpd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */ 334 movl ARG_SIN_PTR, %eax 335 cvtpd2ps %xmm3, %xmm0 /* SP results */ 336 movl ARG_COS_PTR, %ecx 337 movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */ 338 shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */ 339 movss %xmm0, (%ecx) /* store cos(x) */ 340 RETURN 341 342 .p2align 4 343L(arg_less_2pn27): 344 movss ARG_X, %xmm7 /* SP x */ 345 cmpl $0, %eax /* x=0 ? */ 346 je L(arg_zero) /* in case x=0 return sin(+-0)==+-0 */ 347 /* Here if |x|<2^-27 */ 348 /* 349 * Special cases here: 350 * sin(subnormal) raises inexact/underflow 351 * sin(min_normalized) raises inexact/underflow 352 * sin(normalized) raises inexact 353 * cos(here)=1-|x| (raising inexact) 354 */ 355 movaps %xmm0, %xmm3 /* DP x */ 356 mulsd MO1(DP_SMALL), %xmm0 /* DP x*DP_SMALL */ 357 subsd %xmm0, %xmm3 /* DP sin result is x-x*DP_SMALL */ 358 andps MO1(SP_ABS_MASK), %xmm7 /* SP |x| */ 359 cvtsd2ss %xmm3, %xmm0 /* sin(x) */ 360 movl ARG_SIN_PTR, %eax 361 movss MO1(SP_ONE), %xmm1 /* SP 1.0 */ 362 movss %xmm0, (%eax) /* sin(x) store */ 363 movl ARG_COS_PTR, %ecx 364 subss %xmm7, %xmm1 /* cos(x) */ 365 movss %xmm1, (%ecx) /* cos(x) store */ 366 RETURN 367 368 .p2align 4 369L(arg_zero): 370 movss MO1(SP_ONE), %xmm0 /* 1.0 */ 371 movl ARG_SIN_PTR, %eax 372 movl ARG_COS_PTR, %ecx 373 movss %xmm7, (%eax) /* sin(+-0)==x */ 374 movss %xmm0, (%ecx) /* cos(+-0)==1 */ 375 RETURN 376 377 .p2align 4 378L(arg_inf_or_nan): 379 movss ARG_X, %xmm7 /* SP x */ 380 /* Here if |x| is Inf or NAN */ 381 jne L(skip_errno_setting) /* in case of x is NaN */ 382 383 /* Here if x is Inf. Set errno to EDOM. */ 384 call JUMPTARGET(__errno_location) 385 movl $EDOM, (%eax) 386 387 .p2align 4 388L(skip_errno_setting): 389 /* Here if |x| is Inf or NAN. Continued. */ 390 subss %xmm7, %xmm7 /* x-x, result is NaN */ 391 movl ARG_SIN_PTR, %eax 392 movl ARG_COS_PTR, %ecx 393 movss %xmm7, (%eax) 394 movss %xmm7, (%ecx) 395 RETURN 396END(__sincosf_sse2) 397 398 .section .rodata, "a" 399 .p2align 3 400L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */ 401 .long 0x00000000,0x00000000 402 .long 0x54442d18,0x3fe921fb 403 .long 0x54442d18,0x3ff921fb 404 .long 0x7f3321d2,0x4002d97c 405 .long 0x54442d18,0x400921fb 406 .long 0x2955385e,0x400f6a7a 407 .long 0x7f3321d2,0x4012d97c 408 .long 0xe9bba775,0x4015fdbb 409 .long 0x54442d18,0x401921fb 410 .long 0xbeccb2bb,0x401c463a 411 .long 0x2955385e,0x401f6a7a 412 .type L(PIO4J), @object 413 ASM_SIZE_DIRECTIVE(L(PIO4J)) 414 415 .p2align 3 416L(_FPI): /* 4/Pi broken into sum of positive DP values */ 417 .long 0x00000000,0x00000000 418 .long 0x6c000000,0x3ff45f30 419 .long 0x2a000000,0x3e3c9c88 420 .long 0xa8000000,0x3c54fe13 421 .long 0xd0000000,0x3aaf47d4 422 .long 0x6c000000,0x38fbb81b 423 .long 0xe0000000,0x3714acc9 424 .long 0x7c000000,0x3560e410 425 .long 0x56000000,0x33bca2c7 426 .long 0xac000000,0x31fbd778 427 .long 0xe0000000,0x300b7246 428 .long 0xe8000000,0x2e5d2126 429 .long 0x48000000,0x2c970032 430 .long 0xe8000000,0x2ad77504 431 .long 0xe0000000,0x290921cf 432 .long 0xb0000000,0x274deb1c 433 .long 0xe0000000,0x25829a73 434 .long 0xbe000000,0x23fd1046 435 .long 0x10000000,0x2224baed 436 .long 0x8e000000,0x20709d33 437 .long 0x80000000,0x1e535a2f 438 .long 0x64000000,0x1cef904e 439 .long 0x30000000,0x1b0d6398 440 .long 0x24000000,0x1964ce7d 441 .long 0x16000000,0x17b908bf 442 .type L(_FPI), @object 443 ASM_SIZE_DIRECTIVE(L(_FPI)) 444 445/* Coefficients of polynomials for */ 446/* sin(x)~=x+x*x^2*(DP_SIN2_0+x^2*DP_SIN2_1) in low DP part, */ 447/* cos(x)~=1+1*x^2*(DP_COS2_0+x^2*DP_COS2_1) in high DP part, */ 448/* for |x|<2^-5. */ 449 .p2align 4 450L(DP_SINCOS2_0): 451 .long 0x5543d49d,0xbfc55555 452 .long 0xff5cc6fd,0xbfdfffff 453 .type L(DP_SINCOS2_0), @object 454 ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_0)) 455 456 .p2align 4 457L(DP_SINCOS2_1): 458 .long 0x75cec8c5,0x3f8110f4 459 .long 0xb178dac5,0x3fa55514 460 .type L(DP_SINCOS2_1), @object 461 ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_1)) 462 463 .p2align 3 464L(DP_ZERONE): 465 .long 0x00000000,0x00000000 /* 0.0 */ 466 .long 0x00000000,0xbff00000 /* 1.0 */ 467 .type L(DP_ZERONE), @object 468 ASM_SIZE_DIRECTIVE(L(DP_ZERONE)) 469 470 .p2align 3 471L(DP_ONES): 472 .long 0x00000000,0x3ff00000 /* +1.0 */ 473 .long 0x00000000,0xbff00000 /* -1.0 */ 474 .type L(DP_ONES), @object 475 ASM_SIZE_DIRECTIVE(L(DP_ONES)) 476 477/* Coefficients of polynomials for */ 478/* sin(t)~=t+t*t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))) in low DP part, */ 479/* cos(t)~=1+1*t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))) in high DP part, */ 480/* for |t|<Pi/4. */ 481 .p2align 4 482L(DP_SC4): 483 .long 0x1674b58a,0xbe5a947e 484 .long 0xdd8844d7,0xbe923c97 485 .type L(DP_SC4), @object 486 ASM_SIZE_DIRECTIVE(L(DP_SC4)) 487 488 .p2align 4 489L(DP_SC3): 490 .long 0x64e6b5b4,0x3ec71d72 491 .long 0x9ac43cc0,0x3efa00eb 492 .type L(DP_SC3), @object 493 ASM_SIZE_DIRECTIVE(L(DP_SC3)) 494 495 .p2align 4 496L(DP_SC2): 497 .long 0x8b4bd1f9,0xbf2a019f 498 .long 0x348b6874,0xbf56c16b 499 .type L(DP_SC2), @object 500 ASM_SIZE_DIRECTIVE(L(DP_SC2)) 501 502 .p2align 4 503L(DP_SC1): 504 .long 0x10c2688b,0x3f811111 505 .long 0x545c50c7,0x3fa55555 506 .type L(DP_SC1), @object 507 ASM_SIZE_DIRECTIVE(L(DP_SC1)) 508 509 .p2align 4 510L(DP_SC0): 511 .long 0x55551cd9,0xbfc55555 512 .long 0xfffe98ae,0xbfdfffff 513 .type L(DP_SC0), @object 514 ASM_SIZE_DIRECTIVE(L(DP_SC0)) 515 516 .p2align 3 517L(DP_SMALL): 518 .long 0x00000000,0x3cd00000 /* 2^(-50) */ 519 .type L(DP_SMALL), @object 520 ASM_SIZE_DIRECTIVE(L(DP_SMALL)) 521 522 .p2align 3 523L(DP_PIO4): 524 .long 0x54442d18,0x3fe921fb /* Pi/4 */ 525 .type L(DP_PIO4), @object 526 ASM_SIZE_DIRECTIVE(L(DP_PIO4)) 527 528 .p2align 3 529L(DP_2POW52): 530 .long 0x00000000,0x43300000 /* +2^52 */ 531 .long 0x00000000,0xc3300000 /* -2^52 */ 532 .type L(DP_2POW52), @object 533 ASM_SIZE_DIRECTIVE(L(DP_2POW52)) 534 535 .p2align 3 536L(DP_INVPIO4): 537 .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */ 538 .type L(DP_INVPIO4), @object 539 ASM_SIZE_DIRECTIVE(L(DP_INVPIO4)) 540 541 .p2align 3 542L(DP_PIO4HI): 543 .long 0x54000000,0xbfe921fb /* High part of Pi/4 */ 544 .type L(DP_PIO4HI), @object 545 ASM_SIZE_DIRECTIVE(L(DP_PIO4HI)) 546 547 .p2align 3 548L(DP_PIO4LO): 549 .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */ 550 .type L(DP_PIO4LO), @object 551 ASM_SIZE_DIRECTIVE(L(DP_PIO4LO)) 552 553 .p2align 2 554L(SP_INVPIO4): 555 .long 0x3fa2f983 /* 4/Pi */ 556 .type L(SP_INVPIO4), @object 557 ASM_SIZE_DIRECTIVE(L(SP_INVPIO4)) 558 559 .p2align 4 560L(DP_ABS_MASK): /* Mask for getting DP absolute value */ 561 .long 0xffffffff,0x7fffffff 562 .long 0xffffffff,0x7fffffff 563 .type L(DP_ABS_MASK), @object 564 ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK)) 565 566 .p2align 3 567L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */ 568 .long 0x00000000,0xffffffff 569 .type L(DP_HI_MASK), @object 570 ASM_SIZE_DIRECTIVE(L(DP_HI_MASK)) 571 572 .p2align 4 573L(SP_ABS_MASK): /* Mask for getting SP absolute value */ 574 .long 0x7fffffff,0x7fffffff 575 .long 0x7fffffff,0x7fffffff 576 .type L(SP_ABS_MASK), @object 577 ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK)) 578 579 .p2align 2 580L(SP_ONE): 581 .long 0x3f800000 /* 1.0 */ 582 .type L(SP_ONE), @object 583 ASM_SIZE_DIRECTIVE(L(SP_ONE)) 584 585weak_alias(__sincosf, sincosf) 586