1 // SPDX-License-Identifier: BSD-3-Clause 2 3 /*============================================================================ 4 5 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic 6 Package, Release 3a, by John R. Hauser. 7 8 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. 9 All rights reserved. 10 11 Redistribution and use in source and binary forms, with or without 12 modification, are permitted provided that the following conditions are met: 13 14 1. Redistributions of source code must retain the above copyright notice, 15 this list of conditions, and the following disclaimer. 16 17 2. Redistributions in binary form must reproduce the above copyright notice, 18 this list of conditions, and the following disclaimer in the documentation 19 and/or other materials provided with the distribution. 20 21 3. Neither the name of the University nor the names of its contributors may 22 be used to endorse or promote products derived from this software without 23 specific prior written permission. 24 25 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY 26 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 27 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE 28 DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY 29 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 30 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 32 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 33 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 34 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 36 =============================================================================*/ 37 38 #include <stdbool.h> 39 #include <stdint.h> 40 #include "platform.h" 41 #include "internals.h" 42 #include "specialize.h" 43 #include "softfloat.h" 44 45 float128_t softfloat_mulAddF128(uint_fast64_t uiA64,uint_fast64_t uiA0,uint_fast64_t uiB64,uint_fast64_t uiB0,uint_fast64_t uiC64,uint_fast64_t uiC0,uint_fast8_t op)46 softfloat_mulAddF128( 47 uint_fast64_t uiA64, 48 uint_fast64_t uiA0, 49 uint_fast64_t uiB64, 50 uint_fast64_t uiB0, 51 uint_fast64_t uiC64, 52 uint_fast64_t uiC0, 53 uint_fast8_t op 54 ) 55 { 56 bool signA; 57 int_fast32_t expA; 58 struct uint128 sigA; 59 bool signB; 60 int_fast32_t expB; 61 struct uint128 sigB; 62 bool signC; 63 int_fast32_t expC; 64 struct uint128 sigC; 65 bool signZ; 66 uint_fast64_t magBits; 67 struct uint128 uiZ; 68 struct exp32_sig128 normExpSig; 69 int_fast32_t expZ; 70 uint64_t sig256Z[4]; 71 struct uint128 sigZ; 72 int_fast32_t shiftCount, expDiff; 73 struct uint128 x128; 74 uint64_t sig256C[4]; 75 static uint64_t zero256[4] = INIT_UINTM4( 0, 0, 0, 0 ); 76 uint_fast64_t sigZExtra, sig256Z0; 77 union ui128_f128 uZ; 78 79 /*------------------------------------------------------------------------ 80 *------------------------------------------------------------------------*/ 81 signA = signF128UI64( uiA64 ); 82 expA = expF128UI64( uiA64 ); 83 sigA.v64 = fracF128UI64( uiA64 ); 84 sigA.v0 = uiA0; 85 signB = signF128UI64( uiB64 ); 86 expB = expF128UI64( uiB64 ); 87 sigB.v64 = fracF128UI64( uiB64 ); 88 sigB.v0 = uiB0; 89 signC = signF128UI64( uiC64 ) ^ (op == softfloat_mulAdd_subC); 90 expC = expF128UI64( uiC64 ); 91 sigC.v64 = fracF128UI64( uiC64 ); 92 sigC.v0 = uiC0; 93 signZ = signA ^ signB ^ (op == softfloat_mulAdd_subProd); 94 /*------------------------------------------------------------------------ 95 *------------------------------------------------------------------------*/ 96 if ( expA == 0x7FFF ) { 97 if ( 98 (sigA.v64 | sigA.v0) || ((expB == 0x7FFF) && (sigB.v64 | sigB.v0)) 99 ) { 100 goto propagateNaN_ABC; 101 } 102 magBits = expB | sigB.v64 | sigB.v0; 103 goto infProdArg; 104 } 105 if ( expB == 0x7FFF ) { 106 if ( sigB.v64 | sigB.v0 ) goto propagateNaN_ABC; 107 magBits = expA | sigA.v64 | sigA.v0; 108 goto infProdArg; 109 } 110 if ( expC == 0x7FFF ) { 111 if ( sigC.v64 | sigC.v0 ) { 112 uiZ.v64 = 0; 113 uiZ.v0 = 0; 114 goto propagateNaN_ZC; 115 } 116 uiZ.v64 = uiC64; 117 uiZ.v0 = uiC0; 118 goto uiZ; 119 } 120 /*------------------------------------------------------------------------ 121 *------------------------------------------------------------------------*/ 122 if ( ! expA ) { 123 if ( ! (sigA.v64 | sigA.v0) ) goto zeroProd; 124 normExpSig = softfloat_normSubnormalF128Sig( sigA.v64, sigA.v0 ); 125 expA = normExpSig.exp; 126 sigA = normExpSig.sig; 127 } 128 if ( ! expB ) { 129 if ( ! (sigB.v64 | sigB.v0) ) goto zeroProd; 130 normExpSig = softfloat_normSubnormalF128Sig( sigB.v64, sigB.v0 ); 131 expB = normExpSig.exp; 132 sigB = normExpSig.sig; 133 } 134 /*------------------------------------------------------------------------ 135 *------------------------------------------------------------------------*/ 136 expZ = expA + expB - 0x3FFE; 137 sigA.v64 |= UINT64_C( 0x0001000000000000 ); 138 sigB.v64 |= UINT64_C( 0x0001000000000000 ); 139 sigA = softfloat_shortShiftLeft128( sigA.v64, sigA.v0, 8 ); 140 sigB = softfloat_shortShiftLeft128( sigB.v64, sigB.v0, 15 ); 141 softfloat_mul128To256M( sigA.v64, sigA.v0, sigB.v64, sigB.v0, sig256Z ); 142 sigZ.v64 = sig256Z[indexWord( 4, 3 )]; 143 sigZ.v0 = sig256Z[indexWord( 4, 2 )]; 144 shiftCount = 0; 145 if ( ! (sigZ.v64 & UINT64_C( 0x0100000000000000 )) ) { 146 --expZ; 147 shiftCount = -1; 148 } 149 if ( ! expC ) { 150 if ( ! (sigC.v64 | sigC.v0) ) { 151 shiftCount += 8; 152 goto sigZ; 153 } 154 normExpSig = softfloat_normSubnormalF128Sig( sigC.v64, sigC.v0 ); 155 expC = normExpSig.exp; 156 sigC = normExpSig.sig; 157 } 158 sigC.v64 |= UINT64_C( 0x0001000000000000 ); 159 sigC = softfloat_shortShiftLeft128( sigC.v64, sigC.v0, 8 ); 160 /*------------------------------------------------------------------------ 161 *------------------------------------------------------------------------*/ 162 expDiff = expZ - expC; 163 if ( expDiff < 0 ) { 164 expZ = expC; 165 if ( (signZ == signC) || (expDiff < -1) ) { 166 shiftCount -= expDiff; 167 if ( shiftCount ) { 168 sigZ = 169 softfloat_shiftRightJam128( 170 sigZ.v64, sigZ.v0, shiftCount ); 171 } 172 } else { 173 if ( ! shiftCount ) { 174 x128 = 175 softfloat_shortShiftRight128( 176 sig256Z[indexWord( 4, 1 )], sig256Z[indexWord( 4, 0 )], 177 1 178 ); 179 sig256Z[indexWord( 4, 1 )] = (sigZ.v0<<63) | x128.v64; 180 sig256Z[indexWord( 4, 0 )] = x128.v0; 181 sigZ = softfloat_shortShiftRight128( sigZ.v64, sigZ.v0, 1 ); 182 sig256Z[indexWord( 4, 3 )] = sigZ.v64; 183 sig256Z[indexWord( 4, 2 )] = sigZ.v0; 184 } 185 } 186 } else { 187 if ( shiftCount ) softfloat_add256M( sig256Z, sig256Z, sig256Z ); 188 if ( ! expDiff ) { 189 sigZ.v64 = sig256Z[indexWord( 4, 3 )]; 190 sigZ.v0 = sig256Z[indexWord( 4, 2 )]; 191 } else { 192 sig256C[indexWord( 4, 3 )] = sigC.v64; 193 sig256C[indexWord( 4, 2 )] = sigC.v0; 194 sig256C[indexWord( 4, 1 )] = 0; 195 sig256C[indexWord( 4, 0 )] = 0; 196 softfloat_shiftRightJam256M( sig256C, expDiff, sig256C ); 197 } 198 } 199 /*------------------------------------------------------------------------ 200 *------------------------------------------------------------------------*/ 201 shiftCount = 8; 202 if ( signZ == signC ) { 203 /*-------------------------------------------------------------------- 204 *--------------------------------------------------------------------*/ 205 if ( expDiff <= 0 ) { 206 sigZ = softfloat_add128( sigC.v64, sigC.v0, sigZ.v64, sigZ.v0 ); 207 } else { 208 softfloat_add256M( sig256Z, sig256C, sig256Z ); 209 sigZ.v64 = sig256Z[indexWord( 4, 3 )]; 210 sigZ.v0 = sig256Z[indexWord( 4, 2 )]; 211 } 212 if ( sigZ.v64 & UINT64_C( 0x0200000000000000 ) ) { 213 ++expZ; 214 shiftCount = 9; 215 } 216 } else { 217 /*-------------------------------------------------------------------- 218 *--------------------------------------------------------------------*/ 219 if ( expDiff < 0 ) { 220 signZ = signC; 221 if ( expDiff < -1 ) { 222 sigZ = 223 softfloat_sub128( sigC.v64, sigC.v0, sigZ.v64, sigZ.v0 ); 224 sigZExtra = 225 sig256Z[indexWord( 4, 1 )] | sig256Z[indexWord( 4, 0 )]; 226 if ( sigZExtra ) { 227 sigZ = softfloat_sub128( sigZ.v64, sigZ.v0, 0, 1 ); 228 } 229 if ( ! (sigZ.v64 & UINT64_C( 0x0100000000000000 )) ) { 230 --expZ; 231 shiftCount = 7; 232 } 233 goto shiftRightRoundPack; 234 } else { 235 sig256C[indexWord( 4, 3 )] = sigC.v64; 236 sig256C[indexWord( 4, 2 )] = sigC.v0; 237 sig256C[indexWord( 4, 1 )] = 0; 238 sig256C[indexWord( 4, 0 )] = 0; 239 softfloat_sub256M( sig256C, sig256Z, sig256Z ); 240 } 241 } else if ( ! expDiff ) { 242 sigZ = softfloat_sub128( sigZ.v64, sigZ.v0, sigC.v64, sigC.v0 ); 243 if ( 244 ! (sigZ.v64 | sigZ.v0) && ! sig256Z[indexWord( 4, 1 )] 245 && ! sig256Z[indexWord( 4, 0 )] 246 ) { 247 goto completeCancellation; 248 } 249 sig256Z[indexWord( 4, 3 )] = sigZ.v64; 250 sig256Z[indexWord( 4, 2 )] = sigZ.v0; 251 if ( sigZ.v64 & UINT64_C( 0x8000000000000000 ) ) { 252 signZ ^= 1; 253 softfloat_sub256M( zero256, sig256Z, sig256Z ); 254 } 255 } else { 256 softfloat_sub256M( sig256Z, sig256C, sig256Z ); 257 if ( 1 < expDiff ) { 258 sigZ.v64 = sig256Z[indexWord( 4, 3 )]; 259 sigZ.v0 = sig256Z[indexWord( 4, 2 )]; 260 if ( ! (sigZ.v64 & UINT64_C( 0x0100000000000000 )) ) { 261 --expZ; 262 shiftCount = 7; 263 } 264 goto sigZ; 265 } 266 } 267 /*-------------------------------------------------------------------- 268 *--------------------------------------------------------------------*/ 269 sigZ.v64 = sig256Z[indexWord( 4, 3 )]; 270 sigZ.v0 = sig256Z[indexWord( 4, 2 )]; 271 sigZExtra = sig256Z[indexWord( 4, 1 )]; 272 sig256Z0 = sig256Z[indexWord( 4, 0 )]; 273 if ( sigZ.v64 ) { 274 if ( sig256Z0 ) sigZExtra |= 1; 275 } else { 276 expZ -= 64; 277 sigZ.v64 = sigZ.v0; 278 sigZ.v0 = sigZExtra; 279 sigZExtra = sig256Z0; 280 if ( ! sigZ.v64 ) { 281 expZ -= 64; 282 sigZ.v64 = sigZ.v0; 283 sigZ.v0 = sigZExtra; 284 sigZExtra = 0; 285 if ( ! sigZ.v64 ) { 286 expZ -= 64; 287 sigZ.v64 = sigZ.v0; 288 sigZ.v0 = 0; 289 } 290 } 291 } 292 shiftCount = softfloat_countLeadingZeros64( sigZ.v64 ); 293 expZ += 7 - shiftCount; 294 shiftCount = 15 - shiftCount; 295 if ( 0 < shiftCount ) goto shiftRightRoundPack; 296 if ( shiftCount ) { 297 shiftCount = -shiftCount; 298 sigZ = 299 softfloat_shortShiftLeft128( sigZ.v64, sigZ.v0, shiftCount ); 300 x128 = softfloat_shortShiftLeft128( 0, sigZExtra, shiftCount ); 301 sigZ.v0 |= x128.v64; 302 sigZExtra = x128.v0; 303 } 304 goto roundPack; 305 } 306 sigZ: 307 sigZExtra = sig256Z[indexWord( 4, 1 )] | sig256Z[indexWord( 4, 0 )]; 308 shiftRightRoundPack: 309 sigZExtra = (uint64_t) (sigZ.v0<<(64 - shiftCount)) | (sigZExtra != 0); 310 sigZ = softfloat_shortShiftRight128( sigZ.v64, sigZ.v0, shiftCount ); 311 roundPack: 312 return 313 softfloat_roundPackToF128( 314 signZ, expZ - 1, sigZ.v64, sigZ.v0, sigZExtra ); 315 /*------------------------------------------------------------------------ 316 *------------------------------------------------------------------------*/ 317 propagateNaN_ABC: 318 uiZ = softfloat_propagateNaNF128UI( uiA64, uiA0, uiB64, uiB0 ); 319 goto propagateNaN_ZC; 320 /*------------------------------------------------------------------------ 321 *------------------------------------------------------------------------*/ 322 infProdArg: 323 if ( magBits ) { 324 uiZ.v64 = packToF128UI64( signZ, 0x7FFF, 0 ); 325 uiZ.v0 = 0; 326 if ( expC != 0x7FFF ) goto uiZ; 327 if ( sigC.v64 | sigC.v0 ) goto propagateNaN_ZC; 328 if ( signZ == signC ) goto uiZ; 329 } 330 invalid: 331 softfloat_raiseFlags( softfloat_flag_invalid ); 332 uiZ.v64 = defaultNaNF128UI64; 333 uiZ.v0 = defaultNaNF128UI0; 334 propagateNaN_ZC: 335 uiZ = softfloat_propagateNaNF128UI( uiZ.v64, uiZ.v0, uiC64, uiC0 ); 336 goto uiZ; 337 /*------------------------------------------------------------------------ 338 *------------------------------------------------------------------------*/ 339 zeroProd: 340 uiZ.v64 = uiC64; 341 uiZ.v0 = uiC0; 342 if ( ! (expC | sigC.v64 | sigC.v0) && (signZ != signC) ) { 343 completeCancellation: 344 uiZ.v64 = 345 packToF128UI64( 346 softfloat_roundingMode == softfloat_round_min, 0, 0 ); 347 uiZ.v0 = 0; 348 } 349 uiZ: 350 uZ.ui = uiZ; 351 return uZ.f; 352 353 } 354 355