1 // SPDX-License-Identifier: BSD-3-Clause
2
3 /*============================================================================
4
5 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
6 Package, Release 3a, by John R. Hauser.
7
8 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
9 All rights reserved.
10
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 1. Redistributions of source code must retain the above copyright notice,
15 this list of conditions, and the following disclaimer.
16
17 2. Redistributions in binary form must reproduce the above copyright notice,
18 this list of conditions, and the following disclaimer in the documentation
19 and/or other materials provided with the distribution.
20
21 3. Neither the name of the University nor the names of its contributors may
22 be used to endorse or promote products derived from this software without
23 specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
26 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
28 DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
29 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
32 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35
36 =============================================================================*/
37
38 #include <stdbool.h>
39 #include <stdint.h>
40 #include "platform.h"
41 #include "internals.h"
42 #include "specialize.h"
43 #include "softfloat.h"
44
f128_sqrt(float128_t a)45 float128_t f128_sqrt( float128_t a )
46 {
47 union ui128_f128 uA;
48 uint_fast64_t uiA64, uiA0;
49 bool signA;
50 int_fast32_t expA;
51 struct uint128 sigA, uiZ;
52 struct exp32_sig128 normExpSig;
53 int_fast32_t expZ;
54 uint_fast32_t sig32A, recipSqrt32, sig32Z;
55 struct uint128 rem;
56 uint32_t qs[3];
57 uint_fast32_t q;
58 uint_fast64_t x64, sig64Z;
59 struct uint128 term, y;
60 uint_fast64_t sigZExtra;
61 struct uint128 sigZ;
62 union ui128_f128 uZ;
63
64 /*------------------------------------------------------------------------
65 *------------------------------------------------------------------------*/
66 uA.f = a;
67 uiA64 = uA.ui.v64;
68 uiA0 = uA.ui.v0;
69 signA = signF128UI64( uiA64 );
70 expA = expF128UI64( uiA64 );
71 sigA.v64 = fracF128UI64( uiA64 );
72 sigA.v0 = uiA0;
73 /*------------------------------------------------------------------------
74 *------------------------------------------------------------------------*/
75 if ( expA == 0x7FFF ) {
76 if ( sigA.v64 | sigA.v0 ) {
77 uiZ = softfloat_propagateNaNF128UI( uiA64, uiA0, 0, 0 );
78 goto uiZ;
79 }
80 if ( ! signA ) return a;
81 goto invalid;
82 }
83 /*------------------------------------------------------------------------
84 *------------------------------------------------------------------------*/
85 if ( signA ) {
86 if ( ! (expA | sigA.v64 | sigA.v0) ) return a;
87 goto invalid;
88 }
89 /*------------------------------------------------------------------------
90 *------------------------------------------------------------------------*/
91 if ( ! expA ) {
92 if ( ! (sigA.v64 | sigA.v0) ) return a;
93 normExpSig = softfloat_normSubnormalF128Sig( sigA.v64, sigA.v0 );
94 expA = normExpSig.exp;
95 sigA = normExpSig.sig;
96 }
97 /*------------------------------------------------------------------------
98 | (`sig32Z' is guaranteed to be a lower bound on the square root of
99 | `sig32A', which makes `sig32Z' also a lower bound on the square root of
100 | `sigA'.)
101 *------------------------------------------------------------------------*/
102 expZ = ((expA - 0x3FFF)>>1) + 0x3FFE;
103 expA &= 1;
104 sigA.v64 |= UINT64_C( 0x0001000000000000 );
105 sig32A = sigA.v64>>17;
106 recipSqrt32 = softfloat_approxRecipSqrt32_1( expA, sig32A );
107 sig32Z = ((uint_fast64_t) sig32A * recipSqrt32)>>32;
108 if ( expA ) {
109 sig32Z >>= 1;
110 rem = softfloat_shortShiftLeft128( sigA.v64, sigA.v0, 12 );
111 } else {
112 rem = softfloat_shortShiftLeft128( sigA.v64, sigA.v0, 13 );
113 }
114 qs[2] = sig32Z;
115 rem.v64 -= (uint_fast64_t) sig32Z * sig32Z;
116 /*------------------------------------------------------------------------
117 *------------------------------------------------------------------------*/
118 q = ((uint_fast64_t) (uint32_t) (rem.v64>>2) * recipSqrt32)>>32;
119 qs[1] = q;
120 x64 = (uint_fast64_t) sig32Z<<32;
121 sig64Z = x64 + ((uint_fast64_t) q<<3);
122 x64 += sig64Z;
123 rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
124 term = softfloat_mul64ByShifted32To128( x64, q );
125 rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
126 /*------------------------------------------------------------------------
127 *------------------------------------------------------------------------*/
128 q = ((uint_fast64_t) (uint32_t) (rem.v64>>2) * recipSqrt32)>>32;
129 y = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
130 sig64Z <<= 1;
131 /*------------------------------------------------------------------------
132 | (Repeating this loop is a rare occurrence.)
133 *------------------------------------------------------------------------*/
134 for (;;) {
135 term = softfloat_shortShiftLeft128( 0, sig64Z, 32 );
136 term = softfloat_add128( term.v64, term.v0, 0, (uint_fast64_t) q<<6 );
137 term = softfloat_mul128By32( term.v64, term.v0, q );
138 rem = softfloat_sub128( y.v64, y.v0, term.v64, term.v0 );
139 if ( ! (rem.v64 & UINT64_C( 0x8000000000000000 )) ) break;
140 --q;
141 }
142 qs[0] = q;
143 /*------------------------------------------------------------------------
144 *------------------------------------------------------------------------*/
145 q = (((uint_fast64_t) (uint32_t) (rem.v64>>2) * recipSqrt32)>>32) + 2;
146 sigZExtra = (uint64_t) ((uint_fast64_t) q<<59);
147 term = softfloat_shortShiftLeft128( 0, qs[1], 53 );
148 sigZ =
149 softfloat_add128(
150 (uint_fast64_t) qs[2]<<18, ((uint_fast64_t) qs[0]<<24) + (q>>5),
151 term.v64, term.v0
152 );
153 /*------------------------------------------------------------------------
154 *------------------------------------------------------------------------*/
155 if ( (q & 0xF) <= 2 ) {
156 q &= ~3;
157 sigZExtra = (uint64_t) ((uint_fast64_t) q<<59);
158 y = softfloat_shortShiftLeft128( sigZ.v64, sigZ.v0, 6 );
159 y.v0 |= sigZExtra>>58;
160 term = softfloat_sub128( y.v64, y.v0, 0, q );
161 y = softfloat_mul64ByShifted32To128( term.v0, q );
162 term = softfloat_mul64ByShifted32To128( term.v64, q );
163 term = softfloat_add128( term.v64, term.v0, 0, y.v64 );
164 rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 20 );
165 term = softfloat_sub128( term.v64, term.v0, rem.v64, rem.v0 );
166 /*--------------------------------------------------------------------
167 | The concatenation of `term' and `y.v0' is now the negative remainder
168 | (3 words altogether).
169 *--------------------------------------------------------------------*/
170 if ( term.v64 & UINT64_C( 0x8000000000000000 ) ) {
171 sigZExtra |= 1;
172 } else {
173 if ( term.v64 | term.v0 | y.v0 ) {
174 if ( sigZExtra ) {
175 --sigZExtra;
176 } else {
177 sigZ = softfloat_sub128( sigZ.v64, sigZ.v0, 0, 1 );
178 sigZExtra = ~0;
179 }
180 }
181 }
182 }
183 return softfloat_roundPackToF128( 0, expZ, sigZ.v64, sigZ.v0, sigZExtra );
184 /*------------------------------------------------------------------------
185 *------------------------------------------------------------------------*/
186 invalid:
187 softfloat_raiseFlags( softfloat_flag_invalid );
188 uiZ.v64 = defaultNaNF128UI64;
189 uiZ.v0 = defaultNaNF128UI0;
190 uiZ:
191 uZ.ui = uiZ;
192 return uZ.f;
193
194 }
195
196