[337] | 1 | /* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */
|
---|
| 2 | /*-
|
---|
| 3 | * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
|
---|
| 4 | * All rights reserved.
|
---|
| 5 | *
|
---|
| 6 | * Redistribution and use in source and binary forms, with or without
|
---|
| 7 | * modification, are permitted provided that the following conditions
|
---|
| 8 | * are met:
|
---|
| 9 | * 1. Redistributions of source code must retain the above copyright
|
---|
| 10 | * notice, this list of conditions and the following disclaimer.
|
---|
| 11 | * 2. Redistributions in binary form must reproduce the above copyright
|
---|
| 12 | * notice, this list of conditions and the following disclaimer in the
|
---|
| 13 | * documentation and/or other materials provided with the distribution.
|
---|
| 14 | *
|
---|
| 15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
---|
| 16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
---|
| 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
---|
| 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
---|
| 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
---|
| 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
---|
| 21 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
---|
| 22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
---|
| 23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
---|
| 24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
---|
| 25 | * SUCH DAMAGE.
|
---|
| 26 | */
|
---|
| 27 |
|
---|
| 28 | #include <fenv.h>
|
---|
| 29 | #include <math.h>
|
---|
| 30 | #include <stdint.h>
|
---|
| 31 |
|
---|
| 32 | /*
|
---|
| 33 | * Fused multiply-add: Compute x * y + z with a single rounding error.
|
---|
| 34 | *
|
---|
| 35 | * A double has more than twice as much precision than a float, so
|
---|
| 36 | * direct double-precision arithmetic suffices, except where double
|
---|
| 37 | * rounding occurs.
|
---|
| 38 | */
|
---|
| 39 | float fmaf(float x, float y, float z)
|
---|
| 40 | {
|
---|
| 41 | #pragma STDC FENV_ACCESS ON
|
---|
| 42 | double xy, result;
|
---|
| 43 | union {double f; uint64_t i;} u;
|
---|
| 44 | int e;
|
---|
| 45 |
|
---|
| 46 | xy = (double)x * y;
|
---|
| 47 | result = xy + z;
|
---|
| 48 | u.f = result;
|
---|
| 49 | e = u.i>>52 & 0x7ff;
|
---|
| 50 | /* Common case: The double precision result is fine. */
|
---|
| 51 | if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */
|
---|
| 52 | e == 0x7ff || /* NaN */
|
---|
| 53 | result - xy == z || /* exact */
|
---|
| 54 | fegetround() != FE_TONEAREST) /* not round-to-nearest */
|
---|
| 55 | {
|
---|
| 56 | /*
|
---|
| 57 | underflow may not be raised correctly, example:
|
---|
| 58 | fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f)
|
---|
| 59 | */
|
---|
| 60 | #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
|
---|
| 61 | if (e < 0x3ff-126 && e >= 0x3ff-149 && fetestexcept(FE_INEXACT)) {
|
---|
| 62 | feclearexcept(FE_INEXACT);
|
---|
| 63 | /* TODO: gcc and clang bug workaround */
|
---|
| 64 | volatile float vz = z;
|
---|
| 65 | result = xy + vz;
|
---|
| 66 | if (fetestexcept(FE_INEXACT))
|
---|
| 67 | feraiseexcept(FE_UNDERFLOW);
|
---|
| 68 | else
|
---|
| 69 | feraiseexcept(FE_INEXACT);
|
---|
| 70 | }
|
---|
| 71 | #endif
|
---|
| 72 | z = result;
|
---|
| 73 | return z;
|
---|
| 74 | }
|
---|
| 75 |
|
---|
| 76 | /*
|
---|
| 77 | * If result is inexact, and exactly halfway between two float values,
|
---|
| 78 | * we need to adjust the low-order bit in the direction of the error.
|
---|
| 79 | */
|
---|
| 80 | #ifdef FE_TOWARDZERO
|
---|
| 81 | fesetround(FE_TOWARDZERO);
|
---|
| 82 | #endif
|
---|
| 83 | volatile double vxy = xy; /* XXX work around gcc CSE bug */
|
---|
| 84 | double adjusted_result = vxy + z;
|
---|
| 85 | fesetround(FE_TONEAREST);
|
---|
| 86 | if (result == adjusted_result) {
|
---|
| 87 | u.f = adjusted_result;
|
---|
| 88 | u.i++;
|
---|
| 89 | adjusted_result = u.f;
|
---|
| 90 | }
|
---|
| 91 | z = adjusted_result;
|
---|
| 92 | return z;
|
---|
| 93 | }
|
---|