[337] | 1 | /* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtl.c */
|
---|
| 2 | /*-
|
---|
| 3 | * ====================================================
|
---|
| 4 | * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
---|
| 5 | * Copyright (c) 2009-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.
|
---|
| 6 | *
|
---|
| 7 | * Developed at SunPro, a Sun Microsystems, Inc. business.
|
---|
| 8 | * Permission to use, copy, modify, and distribute this
|
---|
| 9 | * software is freely granted, provided that this notice
|
---|
| 10 | * is preserved.
|
---|
| 11 | * ====================================================
|
---|
| 12 | *
|
---|
| 13 | * The argument reduction and testing for exceptional cases was
|
---|
| 14 | * written by Steven G. Kargl with input from Bruce D. Evans
|
---|
| 15 | * and David A. Schultz.
|
---|
| 16 | */
|
---|
| 17 |
|
---|
| 18 | #include "libm.h"
|
---|
| 19 |
|
---|
| 20 | #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
---|
| 21 | long double cbrtl(long double x)
|
---|
| 22 | {
|
---|
| 23 | return cbrt(x);
|
---|
| 24 | }
|
---|
| 25 | #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
|
---|
| 26 | static const unsigned B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */
|
---|
| 27 |
|
---|
| 28 | long double cbrtl(long double x)
|
---|
| 29 | {
|
---|
| 30 | union ldshape u = {x}, v;
|
---|
| 31 | union {float f; uint32_t i;} uft;
|
---|
| 32 | long double r, s, t, w;
|
---|
| 33 | double_t dr, dt, dx;
|
---|
| 34 | float_t ft;
|
---|
| 35 | int e = u.i.se & 0x7fff;
|
---|
| 36 | int sign = u.i.se & 0x8000;
|
---|
| 37 |
|
---|
| 38 | /*
|
---|
| 39 | * If x = +-Inf, then cbrt(x) = +-Inf.
|
---|
| 40 | * If x = NaN, then cbrt(x) = NaN.
|
---|
| 41 | */
|
---|
| 42 | if (e == 0x7fff)
|
---|
| 43 | return x + x;
|
---|
| 44 | if (e == 0) {
|
---|
| 45 | /* Adjust subnormal numbers. */
|
---|
| 46 | u.f *= 0x1p120;
|
---|
| 47 | e = u.i.se & 0x7fff;
|
---|
| 48 | /* If x = +-0, then cbrt(x) = +-0. */
|
---|
| 49 | if (e == 0)
|
---|
| 50 | return x;
|
---|
| 51 | e -= 120;
|
---|
| 52 | }
|
---|
| 53 | e -= 0x3fff;
|
---|
| 54 | u.i.se = 0x3fff;
|
---|
| 55 | x = u.f;
|
---|
| 56 | switch (e % 3) {
|
---|
| 57 | case 1:
|
---|
| 58 | case -2:
|
---|
| 59 | x *= 2;
|
---|
| 60 | e--;
|
---|
| 61 | break;
|
---|
| 62 | case 2:
|
---|
| 63 | case -1:
|
---|
| 64 | x *= 4;
|
---|
| 65 | e -= 2;
|
---|
| 66 | break;
|
---|
| 67 | }
|
---|
| 68 | v.f = 1.0;
|
---|
| 69 | v.i.se = sign | (0x3fff + e/3);
|
---|
| 70 |
|
---|
| 71 | /*
|
---|
| 72 | * The following is the guts of s_cbrtf, with the handling of
|
---|
| 73 | * special values removed and extra care for accuracy not taken,
|
---|
| 74 | * but with most of the extra accuracy not discarded.
|
---|
| 75 | */
|
---|
| 76 |
|
---|
| 77 | /* ~5-bit estimate: */
|
---|
| 78 | uft.f = x;
|
---|
| 79 | uft.i = (uft.i & 0x7fffffff)/3 + B1;
|
---|
| 80 | ft = uft.f;
|
---|
| 81 |
|
---|
| 82 | /* ~16-bit estimate: */
|
---|
| 83 | dx = x;
|
---|
| 84 | dt = ft;
|
---|
| 85 | dr = dt * dt * dt;
|
---|
| 86 | dt = dt * (dx + dx + dr) / (dx + dr + dr);
|
---|
| 87 |
|
---|
| 88 | /* ~47-bit estimate: */
|
---|
| 89 | dr = dt * dt * dt;
|
---|
| 90 | dt = dt * (dx + dx + dr) / (dx + dr + dr);
|
---|
| 91 |
|
---|
| 92 | #if LDBL_MANT_DIG == 64
|
---|
| 93 | /*
|
---|
| 94 | * dt is cbrtl(x) to ~47 bits (after x has been reduced to 1 <= x < 8).
|
---|
| 95 | * Round it away from zero to 32 bits (32 so that t*t is exact, and
|
---|
| 96 | * away from zero for technical reasons).
|
---|
| 97 | */
|
---|
| 98 | t = dt + (0x1.0p32L + 0x1.0p-31L) - 0x1.0p32;
|
---|
| 99 | #elif LDBL_MANT_DIG == 113
|
---|
| 100 | /*
|
---|
| 101 | * Round dt away from zero to 47 bits. Since we don't trust the 47,
|
---|
| 102 | * add 2 47-bit ulps instead of 1 to round up. Rounding is slow and
|
---|
| 103 | * might be avoidable in this case, since on most machines dt will
|
---|
| 104 | * have been evaluated in 53-bit precision and the technical reasons
|
---|
| 105 | * for rounding up might not apply to either case in cbrtl() since
|
---|
| 106 | * dt is much more accurate than needed.
|
---|
| 107 | */
|
---|
| 108 | t = dt + 0x2.0p-46 + 0x1.0p60L - 0x1.0p60;
|
---|
| 109 | #endif
|
---|
| 110 |
|
---|
| 111 | /*
|
---|
| 112 | * Final step Newton iteration to 64 or 113 bits with
|
---|
| 113 | * error < 0.667 ulps
|
---|
| 114 | */
|
---|
| 115 | s = t*t; /* t*t is exact */
|
---|
| 116 | r = x/s; /* error <= 0.5 ulps; |r| < |t| */
|
---|
| 117 | w = t+t; /* t+t is exact */
|
---|
| 118 | r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */
|
---|
| 119 | t = t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */
|
---|
| 120 |
|
---|
| 121 | t *= v.f;
|
---|
| 122 | return t;
|
---|
| 123 | }
|
---|
| 124 | #endif
|
---|