1 /* SPDX-License-Identifier: BSD-3-Clause */
2 
3 /*============================================================================
4 
5 This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
6 Package, Release 3a, by John R. Hauser.
7 
8 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
9 All rights reserved.
10 
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13 
14  1. Redistributions of source code must retain the above copyright notice,
15     this list of conditions, and the following disclaimer.
16 
17  2. Redistributions in binary form must reproduce the above copyright notice,
18     this list of conditions, and the following disclaimer in the documentation
19     and/or other materials provided with the distribution.
20 
21  3. Neither the name of the University nor the names of its contributors may
22     be used to endorse or promote products derived from this software without
23     specific prior written permission.
24 
25 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
26 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
28 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
29 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
32 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 
36 =============================================================================*/
37 
38 
39 /*============================================================================
40 | Note:  If SoftFloat is made available as a general library for programs to
41 | use, it is strongly recommended that a platform-specific version of this
42 | header, "softfloat.h", be created that folds in "softfloat_types.h" and that
43 | eliminates all dependencies on compile-time macros.
44 *============================================================================*/
45 
46 
47 #ifndef softfloat_h
48 #define softfloat_h 1
49 
50 #include <stdbool.h>
51 #include <stdint.h>
52 
53 #include "softfloat_types.h"
54 
55 /*----------------------------------------------------------------------------
56 | Software floating-point underflow tininess-detection mode.
57 *----------------------------------------------------------------------------*/
58 extern uint_fast8_t softfloat_detectTininess;
59 enum {
60     softfloat_tininess_beforeRounding = 0,
61     softfloat_tininess_afterRounding  = 1
62 };
63 
64 /*----------------------------------------------------------------------------
65 | Software floating-point rounding mode.
66 *----------------------------------------------------------------------------*/
67 extern uint_fast8_t softfloat_roundingMode;
68 enum {
69     softfloat_round_near_even   = 0,
70     softfloat_round_minMag      = 1,
71     softfloat_round_min         = 2,
72     softfloat_round_max         = 3,
73     softfloat_round_near_maxMag = 4
74 };
75 
76 /*----------------------------------------------------------------------------
77 | Software floating-point exception flags.
78 *----------------------------------------------------------------------------*/
79 extern uint_fast8_t softfloat_exceptionFlags;
80 enum {
81     softfloat_flag_inexact   =  1,
82     softfloat_flag_underflow =  2,
83     softfloat_flag_overflow  =  4,
84     softfloat_flag_infinite  =  8,
85     softfloat_flag_invalid   = 16
86 };
87 
88 /*----------------------------------------------------------------------------
89 | Routine to raise any or all of the software floating-point exception flags.
90 *----------------------------------------------------------------------------*/
91 void softfloat_raiseFlags( uint_fast8_t );
92 
93 /*----------------------------------------------------------------------------
94 | Integer-to-floating-point conversion routines.
95 *----------------------------------------------------------------------------*/
96 float32_t ui32_to_f32( uint32_t );
97 float64_t ui32_to_f64( uint32_t );
98 #ifdef SOFTFLOAT_FAST_INT64
99 extFloat80_t ui32_to_extF80( uint32_t );
100 float128_t ui32_to_f128( uint32_t );
101 #endif
102 void ui32_to_extF80M( uint32_t, extFloat80_t * );
103 void ui32_to_f128M( uint32_t, float128_t * );
104 float32_t ui64_to_f32( uint64_t );
105 float64_t ui64_to_f64( uint64_t );
106 #ifdef SOFTFLOAT_FAST_INT64
107 extFloat80_t ui64_to_extF80( uint64_t );
108 float128_t ui64_to_f128( uint64_t );
109 #endif
110 void ui64_to_extF80M( uint64_t, extFloat80_t * );
111 void ui64_to_f128M( uint64_t, float128_t * );
112 float32_t i32_to_f32( int32_t );
113 float64_t i32_to_f64( int32_t );
114 #ifdef SOFTFLOAT_FAST_INT64
115 extFloat80_t i32_to_extF80( int32_t );
116 float128_t i32_to_f128( int32_t );
117 #endif
118 void i32_to_extF80M( int32_t, extFloat80_t * );
119 void i32_to_f128M( int32_t, float128_t * );
120 float32_t i64_to_f32( int64_t );
121 float64_t i64_to_f64( int64_t );
122 #ifdef SOFTFLOAT_FAST_INT64
123 extFloat80_t i64_to_extF80( int64_t );
124 float128_t i64_to_f128( int64_t );
125 #endif
126 void i64_to_extF80M( int64_t, extFloat80_t * );
127 void i64_to_f128M( int64_t, float128_t * );
128 
129 /*----------------------------------------------------------------------------
130 | 32-bit (single-precision) floating-point operations.
131 *----------------------------------------------------------------------------*/
132 uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool );
133 uint_fast64_t f32_to_ui64( float32_t, uint_fast8_t, bool );
134 int_fast32_t f32_to_i32( float32_t, uint_fast8_t, bool );
135 int_fast64_t f32_to_i64( float32_t, uint_fast8_t, bool );
136 uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool );
137 uint_fast64_t f32_to_ui64_r_minMag( float32_t, bool );
138 int_fast32_t f32_to_i32_r_minMag( float32_t, bool );
139 int_fast64_t f32_to_i64_r_minMag( float32_t, bool );
140 float64_t f32_to_f64( float32_t );
141 #ifdef SOFTFLOAT_FAST_INT64
142 extFloat80_t f32_to_extF80( float32_t );
143 float128_t f32_to_f128( float32_t );
144 #endif
145 void f32_to_extF80M( float32_t, extFloat80_t * );
146 void f32_to_f128M( float32_t, float128_t * );
147 float32_t f32_roundToInt( float32_t, uint_fast8_t, bool );
148 float32_t f32_add( float32_t, float32_t );
149 float32_t f32_sub( float32_t, float32_t );
150 float32_t f32_mul( float32_t, float32_t );
151 float32_t f32_mulAdd( float32_t, float32_t, float32_t );
152 float32_t f32_div( float32_t, float32_t );
153 float32_t f32_rem( float32_t, float32_t );
154 float32_t f32_sqrt( float32_t );
155 bool f32_eq( float32_t, float32_t );
156 bool f32_le( float32_t, float32_t );
157 bool f32_lt( float32_t, float32_t );
158 bool f32_eq_signaling( float32_t, float32_t );
159 bool f32_le_quiet( float32_t, float32_t );
160 bool f32_lt_quiet( float32_t, float32_t );
161 bool f32_isSignalingNaN( float32_t );
162 
163 /*----------------------------------------------------------------------------
164 | 64-bit (double-precision) floating-point operations.
165 *----------------------------------------------------------------------------*/
166 uint_fast32_t f64_to_ui32( float64_t, uint_fast8_t, bool );
167 uint_fast64_t f64_to_ui64( float64_t, uint_fast8_t, bool );
168 int_fast32_t f64_to_i32( float64_t, uint_fast8_t, bool );
169 int_fast64_t f64_to_i64( float64_t, uint_fast8_t, bool );
170 uint_fast32_t f64_to_ui32_r_minMag( float64_t, bool );
171 uint_fast64_t f64_to_ui64_r_minMag( float64_t, bool );
172 int_fast32_t f64_to_i32_r_minMag( float64_t, bool );
173 int_fast64_t f64_to_i64_r_minMag( float64_t, bool );
174 float32_t f64_to_f32( float64_t );
175 #ifdef SOFTFLOAT_FAST_INT64
176 extFloat80_t f64_to_extF80( float64_t );
177 float128_t f64_to_f128( float64_t );
178 #endif
179 void f64_to_extF80M( float64_t, extFloat80_t * );
180 void f64_to_f128M( float64_t, float128_t * );
181 float64_t f64_roundToInt( float64_t, uint_fast8_t, bool );
182 float64_t f64_add( float64_t, float64_t );
183 float64_t f64_sub( float64_t, float64_t );
184 float64_t f64_mul( float64_t, float64_t );
185 float64_t f64_mulAdd( float64_t, float64_t, float64_t );
186 float64_t f64_div( float64_t, float64_t );
187 float64_t f64_rem( float64_t, float64_t );
188 float64_t f64_sqrt( float64_t );
189 bool f64_eq( float64_t, float64_t );
190 bool f64_le( float64_t, float64_t );
191 bool f64_lt( float64_t, float64_t );
192 bool f64_eq_signaling( float64_t, float64_t );
193 bool f64_le_quiet( float64_t, float64_t );
194 bool f64_lt_quiet( float64_t, float64_t );
195 bool f64_isSignalingNaN( float64_t );
196 
197 /*----------------------------------------------------------------------------
198 | Rounding precision for 80-bit extended double-precision floating-point.
199 | Valid values are 32, 64, and 80.
200 *----------------------------------------------------------------------------*/
201 extern uint_fast8_t extF80_roundingPrecision;
202 
203 /*----------------------------------------------------------------------------
204 | 80-bit extended double-precision floating-point operations.
205 *----------------------------------------------------------------------------*/
206 #ifdef SOFTFLOAT_FAST_INT64
207 uint_fast32_t extF80_to_ui32( extFloat80_t, uint_fast8_t, bool );
208 uint_fast64_t extF80_to_ui64( extFloat80_t, uint_fast8_t, bool );
209 int_fast32_t extF80_to_i32( extFloat80_t, uint_fast8_t, bool );
210 int_fast64_t extF80_to_i64( extFloat80_t, uint_fast8_t, bool );
211 uint_fast32_t extF80_to_ui32_r_minMag( extFloat80_t, bool );
212 uint_fast64_t extF80_to_ui64_r_minMag( extFloat80_t, bool );
213 int_fast32_t extF80_to_i32_r_minMag( extFloat80_t, bool );
214 int_fast64_t extF80_to_i64_r_minMag( extFloat80_t, bool );
215 float32_t extF80_to_f32( extFloat80_t );
216 float64_t extF80_to_f64( extFloat80_t );
217 float128_t extF80_to_f128( extFloat80_t );
218 extFloat80_t extF80_roundToInt( extFloat80_t, uint_fast8_t, bool );
219 extFloat80_t extF80_add( extFloat80_t, extFloat80_t );
220 extFloat80_t extF80_sub( extFloat80_t, extFloat80_t );
221 extFloat80_t extF80_mul( extFloat80_t, extFloat80_t );
222 extFloat80_t extF80_div( extFloat80_t, extFloat80_t );
223 extFloat80_t extF80_rem( extFloat80_t, extFloat80_t );
224 extFloat80_t extF80_sqrt( extFloat80_t );
225 bool extF80_eq( extFloat80_t, extFloat80_t );
226 bool extF80_le( extFloat80_t, extFloat80_t );
227 bool extF80_lt( extFloat80_t, extFloat80_t );
228 bool extF80_eq_signaling( extFloat80_t, extFloat80_t );
229 bool extF80_le_quiet( extFloat80_t, extFloat80_t );
230 bool extF80_lt_quiet( extFloat80_t, extFloat80_t );
231 bool extF80_isSignalingNaN( extFloat80_t );
232 #endif
233 uint_fast32_t extF80M_to_ui32( const extFloat80_t *, uint_fast8_t, bool );
234 uint_fast64_t extF80M_to_ui64( const extFloat80_t *, uint_fast8_t, bool );
235 int_fast32_t extF80M_to_i32( const extFloat80_t *, uint_fast8_t, bool );
236 int_fast64_t extF80M_to_i64( const extFloat80_t *, uint_fast8_t, bool );
237 uint_fast32_t extF80M_to_ui32_r_minMag( const extFloat80_t *, bool );
238 uint_fast64_t extF80M_to_ui64_r_minMag( const extFloat80_t *, bool );
239 int_fast32_t extF80M_to_i32_r_minMag( const extFloat80_t *, bool );
240 int_fast64_t extF80M_to_i64_r_minMag( const extFloat80_t *, bool );
241 float32_t extF80M_to_f32( const extFloat80_t * );
242 float64_t extF80M_to_f64( const extFloat80_t * );
243 void extF80M_to_f128M( const extFloat80_t *, float128_t * );
244 void
245  extF80M_roundToInt(
246      const extFloat80_t *, uint_fast8_t, bool, extFloat80_t * );
247 void extF80M_add( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
248 void extF80M_sub( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
249 void extF80M_mul( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
250 void extF80M_div( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
251 void extF80M_rem( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
252 void extF80M_sqrt( const extFloat80_t *, extFloat80_t * );
253 bool extF80M_eq( const extFloat80_t *, const extFloat80_t * );
254 bool extF80M_le( const extFloat80_t *, const extFloat80_t * );
255 bool extF80M_lt( const extFloat80_t *, const extFloat80_t * );
256 bool extF80M_eq_signaling( const extFloat80_t *, const extFloat80_t * );
257 bool extF80M_le_quiet( const extFloat80_t *, const extFloat80_t * );
258 bool extF80M_lt_quiet( const extFloat80_t *, const extFloat80_t * );
259 bool extF80M_isSignalingNaN( const extFloat80_t * );
260 
261 /*----------------------------------------------------------------------------
262 | 128-bit (quadruple-precision) floating-point operations.
263 *----------------------------------------------------------------------------*/
264 #ifdef SOFTFLOAT_FAST_INT64
265 uint_fast32_t f128_to_ui32( float128_t, uint_fast8_t, bool );
266 uint_fast64_t f128_to_ui64( float128_t, uint_fast8_t, bool );
267 int_fast32_t f128_to_i32( float128_t, uint_fast8_t, bool );
268 int_fast64_t f128_to_i64( float128_t, uint_fast8_t, bool );
269 uint_fast32_t f128_to_ui32_r_minMag( float128_t, bool );
270 uint_fast64_t f128_to_ui64_r_minMag( float128_t, bool );
271 int_fast32_t f128_to_i32_r_minMag( float128_t, bool );
272 int_fast64_t f128_to_i64_r_minMag( float128_t, bool );
273 float32_t f128_to_f32( float128_t );
274 float64_t f128_to_f64( float128_t );
275 extFloat80_t f128_to_extF80( float128_t );
276 float128_t f128_roundToInt( float128_t, uint_fast8_t, bool );
277 float128_t f128_add( float128_t, float128_t );
278 float128_t f128_sub( float128_t, float128_t );
279 float128_t f128_mul( float128_t, float128_t );
280 float128_t f128_mulAdd( float128_t, float128_t, float128_t );
281 float128_t f128_div( float128_t, float128_t );
282 float128_t f128_rem( float128_t, float128_t );
283 float128_t f128_sqrt( float128_t );
284 bool f128_eq( float128_t, float128_t );
285 bool f128_le( float128_t, float128_t );
286 bool f128_lt( float128_t, float128_t );
287 bool f128_eq_signaling( float128_t, float128_t );
288 bool f128_le_quiet( float128_t, float128_t );
289 bool f128_lt_quiet( float128_t, float128_t );
290 bool f128_isSignalingNaN( float128_t );
291 #endif
292 uint_fast32_t f128M_to_ui32( const float128_t *, uint_fast8_t, bool );
293 uint_fast64_t f128M_to_ui64( const float128_t *, uint_fast8_t, bool );
294 int_fast32_t f128M_to_i32( const float128_t *, uint_fast8_t, bool );
295 int_fast64_t f128M_to_i64( const float128_t *, uint_fast8_t, bool );
296 uint_fast32_t f128M_to_ui32_r_minMag( const float128_t *, bool );
297 uint_fast64_t f128M_to_ui64_r_minMag( const float128_t *, bool );
298 int_fast32_t f128M_to_i32_r_minMag( const float128_t *, bool );
299 int_fast64_t f128M_to_i64_r_minMag( const float128_t *, bool );
300 float32_t f128M_to_f32( const float128_t * );
301 float64_t f128M_to_f64( const float128_t * );
302 void f128M_to_extF80M( const float128_t *, extFloat80_t * );
303 void f128M_roundToInt( const float128_t *, uint_fast8_t, bool, float128_t * );
304 void f128M_add( const float128_t *, const float128_t *, float128_t * );
305 void f128M_sub( const float128_t *, const float128_t *, float128_t * );
306 void f128M_mul( const float128_t *, const float128_t *, float128_t * );
307 void
308  f128M_mulAdd(
309      const float128_t *, const float128_t *, const float128_t *, float128_t *
310  );
311 void f128M_div( const float128_t *, const float128_t *, float128_t * );
312 void f128M_rem( const float128_t *, const float128_t *, float128_t * );
313 void f128M_sqrt( const float128_t *, float128_t * );
314 bool f128M_eq( const float128_t *, const float128_t * );
315 bool f128M_le( const float128_t *, const float128_t * );
316 bool f128M_lt( const float128_t *, const float128_t * );
317 bool f128M_eq_signaling( const float128_t *, const float128_t * );
318 bool f128M_le_quiet( const float128_t *, const float128_t * );
319 bool f128M_lt_quiet( const float128_t *, const float128_t * );
320 bool f128M_isSignalingNaN( const float128_t * );
321 
322 #endif
323 
324