mirror of git://sourceware.org/git/glibc.git
New generic log2f
Similar to the new logf: double precision arithmetics and a small lookup table is used. The argument reduction step is the same as in the new logf. without wrapper on aarch64: log2f reciprocal-throughput: 2.3x faster log2f latency: 2.1x faster old worst case error: 1.72 ulp new worst case error: 0.75 ulp aarch64 .text size: -252 bytes aarch64 .rodata size: +244 bytes * math/Makefile (type-float-routines): Add e_log2f_data. * sysdeps/ieee754/flt-32/e_log2f.c: New implementation. * sysdeps/ieee754/flt-32/e_log2f_data.c: New file. * sysdeps/ieee754/flt-32/math_config.h (__log2f_data): Define. (LOG2F_TABLE_BITS, LOG2F_POLY_ORDER): Define. * sysdeps/i386/fpu/e_log2f_data.c: New file. * sysdeps/ia64/fpu/e_log2f_data.c: New file. * sysdeps/m68k/m680x0/fpu/e_log2f_data.c: New file.
This commit is contained in:
parent
90c42e40d0
commit
875c76c704
11
ChangeLog
11
ChangeLog
|
@ -1,3 +1,14 @@
|
|||
2017-09-29 Szabolcs Nagy <szabolcs.nagy@arm.com>
|
||||
|
||||
* math/Makefile (type-float-routines): Add e_log2f_data.
|
||||
* sysdeps/ieee754/flt-32/e_log2f.c: New implementation.
|
||||
* sysdeps/ieee754/flt-32/e_log2f_data.c: New file.
|
||||
* sysdeps/ieee754/flt-32/math_config.h (__log2f_data): Define.
|
||||
(LOG2F_TABLE_BITS, LOG2F_POLY_ORDER): Define.
|
||||
* sysdeps/i386/fpu/e_log2f_data.c: New file.
|
||||
* sysdeps/ia64/fpu/e_log2f_data.c: New file.
|
||||
* sysdeps/m68k/m680x0/fpu/e_log2f_data.c: New file.
|
||||
|
||||
2017-09-29 Szabolcs Nagy <szabolcs.nagy@arm.com>
|
||||
|
||||
* math/Makefile (type-float-routines): Add e_logf_data.
|
||||
|
|
2
NEWS
2
NEWS
|
@ -14,7 +14,7 @@ Major new features:
|
|||
|
||||
* Optimized x86-64 trunc and truncf for processors with SSE4.1.
|
||||
|
||||
* Optimized generic expf, exp2f, logf.
|
||||
* Optimized generic expf, exp2f, logf, log2f.
|
||||
|
||||
* In order to support faster and safer process termination the malloc API
|
||||
family of functions will no longer print a failure address and stack
|
||||
|
|
|
@ -115,7 +115,8 @@ type-double-routines := branred doasin dosincos halfulp mpa mpatan2 \
|
|||
|
||||
# float support
|
||||
type-float-suffix := f
|
||||
type-float-routines := k_rem_pio2f math_errf e_exp2f_data e_logf_data
|
||||
type-float-routines := k_rem_pio2f math_errf e_exp2f_data e_logf_data \
|
||||
e_log2f_data
|
||||
|
||||
# _Float128 support
|
||||
type-float128-suffix := f128
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
/* Not needed. */
|
|
@ -0,0 +1 @@
|
|||
/* Not needed. */
|
|
@ -1,86 +1,88 @@
|
|||
/* e_logf.c -- float version of e_log.c.
|
||||
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
||||
* adapted for log2 by Ulrich Drepper <drepper@cygnus.com>
|
||||
*/
|
||||
/* Single-precision log2 function.
|
||||
Copyright (C) 2017 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <math.h>
|
||||
#include <math_private.h>
|
||||
#include <fix-int-fp-convert-zero.h>
|
||||
#include <stdint.h>
|
||||
#include "math_config.h"
|
||||
|
||||
static const float
|
||||
ln2 = 0.69314718055994530942,
|
||||
two25 = 3.355443200e+07, /* 0x4c000000 */
|
||||
Lg1 = 6.6666668653e-01, /* 3F2AAAAB */
|
||||
Lg2 = 4.0000000596e-01, /* 3ECCCCCD */
|
||||
Lg3 = 2.8571429849e-01, /* 3E924925 */
|
||||
Lg4 = 2.2222198546e-01, /* 3E638E29 */
|
||||
Lg5 = 1.8183572590e-01, /* 3E3A3325 */
|
||||
Lg6 = 1.5313838422e-01, /* 3E1CD04F */
|
||||
Lg7 = 1.4798198640e-01; /* 3E178897 */
|
||||
/*
|
||||
LOG2F_TABLE_BITS = 4
|
||||
LOG2F_POLY_ORDER = 4
|
||||
|
||||
static const float zero = 0.0;
|
||||
ULP error: 0.752 (nearest rounding.)
|
||||
Relative error: 1.9 * 2^-26 (before rounding.)
|
||||
*/
|
||||
|
||||
#define N (1 << LOG2F_TABLE_BITS)
|
||||
#define T __log2f_data.tab
|
||||
#define A __log2f_data.poly
|
||||
#define OFF 0x3f330000
|
||||
|
||||
float
|
||||
__ieee754_log2f(float x)
|
||||
__ieee754_log2f (float x)
|
||||
{
|
||||
float hfsq,f,s,z,R,w,t1,t2,dk;
|
||||
int32_t k,ix,i,j;
|
||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||
double_t z, r, r2, p, y, y0, invc, logc;
|
||||
uint32_t ix, iz, top, tmp;
|
||||
int k, i;
|
||||
|
||||
GET_FLOAT_WORD(ix,x);
|
||||
ix = asuint (x);
|
||||
#if WANT_ROUNDING
|
||||
/* Fix sign of zero with downward rounding when x==1. */
|
||||
if (__glibc_unlikely (ix == 0x3f800000))
|
||||
return 0;
|
||||
#endif
|
||||
if (__glibc_unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
|
||||
{
|
||||
/* x < 0x1p-126 or inf or nan. */
|
||||
if (ix * 2 == 0)
|
||||
return __math_divzerof (1);
|
||||
if (ix == 0x7f800000) /* log2(inf) == inf. */
|
||||
return x;
|
||||
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
|
||||
return __math_invalidf (x);
|
||||
/* x is subnormal, normalize it. */
|
||||
ix = asuint (x * 0x1p23f);
|
||||
ix -= 23 << 23;
|
||||
}
|
||||
|
||||
k=0;
|
||||
if (ix < 0x00800000) { /* x < 2**-126 */
|
||||
if (__builtin_expect((ix&0x7fffffff)==0, 0))
|
||||
return -two25/__fabsf (x); /* log(+-0)=-inf */
|
||||
if (__builtin_expect(ix<0, 0))
|
||||
return (x-x)/(x-x); /* log(-#) = NaN */
|
||||
k -= 25; x *= two25; /* subnormal number, scale up x */
|
||||
GET_FLOAT_WORD(ix,x);
|
||||
}
|
||||
if (__builtin_expect(ix >= 0x7f800000, 0)) return x+x;
|
||||
k += (ix>>23)-127;
|
||||
ix &= 0x007fffff;
|
||||
i = (ix+(0x95f64<<3))&0x800000;
|
||||
SET_FLOAT_WORD(x,ix|(i^0x3f800000)); /* normalize x or x/2 */
|
||||
k += (i>>23);
|
||||
dk = (float)k;
|
||||
f = x-(float)1.0;
|
||||
if((0x007fffff&(15+ix))<16) { /* |f| < 2**-20 */
|
||||
if(f==zero)
|
||||
{
|
||||
if (FIX_INT_FP_CONVERT_ZERO && dk == 0.0f)
|
||||
dk = 0.0f;
|
||||
return dk;
|
||||
}
|
||||
R = f*f*((float)0.5-(float)0.33333333333333333*f);
|
||||
return dk-(R-f)/ln2;
|
||||
}
|
||||
s = f/((float)2.0+f);
|
||||
z = s*s;
|
||||
i = ix-(0x6147a<<3);
|
||||
w = z*z;
|
||||
j = (0x6b851<<3)-ix;
|
||||
t1= w*(Lg2+w*(Lg4+w*Lg6));
|
||||
t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
|
||||
i |= j;
|
||||
R = t2+t1;
|
||||
if(i>0) {
|
||||
hfsq=(float)0.5*f*f;
|
||||
return dk-((hfsq-(s*(hfsq+R)))-f)/ln2;
|
||||
} else {
|
||||
return dk-((s*(f-R))-f)/ln2;
|
||||
}
|
||||
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
||||
The range is split into N subintervals.
|
||||
The ith subinterval contains z and c is near its center. */
|
||||
tmp = ix - OFF;
|
||||
i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
|
||||
top = tmp & 0xff800000;
|
||||
iz = ix - top;
|
||||
k = (int32_t) tmp >> 23; /* arithmetic shift */
|
||||
invc = T[i].invc;
|
||||
logc = T[i].logc;
|
||||
z = (double_t) asfloat (iz);
|
||||
|
||||
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
|
||||
r = z * invc - 1;
|
||||
y0 = logc + (double_t) k;
|
||||
|
||||
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
|
||||
r2 = r * r;
|
||||
y = A[1] * r + A[2];
|
||||
y = A[0] * r2 + y;
|
||||
p = A[3] * r + y0;
|
||||
y = y * r2 + p;
|
||||
return (float) y;
|
||||
}
|
||||
strong_alias (__ieee754_log2f, __log2f_finite)
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
/* Data definition for log2f.
|
||||
Copyright (C) 2017 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "math_config.h"
|
||||
|
||||
const struct log2f_data __log2f_data = {
|
||||
.tab = {
|
||||
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
|
||||
{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
|
||||
{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
|
||||
{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
|
||||
{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
|
||||
{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
|
||||
{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
|
||||
{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
|
||||
{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
|
||||
{ 0x1p+0, 0x0p+0 },
|
||||
{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
|
||||
{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
|
||||
{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
|
||||
{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
|
||||
{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
|
||||
{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 },
|
||||
},
|
||||
.poly = {
|
||||
-0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1,
|
||||
0x1.715475f35c8b8p0,
|
||||
}
|
||||
};
|
|
@ -123,4 +123,15 @@ extern const struct logf_data
|
|||
double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
|
||||
} __logf_data attribute_hidden;
|
||||
|
||||
#define LOG2F_TABLE_BITS 4
|
||||
#define LOG2F_POLY_ORDER 4
|
||||
extern const struct log2f_data
|
||||
{
|
||||
struct
|
||||
{
|
||||
double invc, logc;
|
||||
} tab[1 << LOG2F_TABLE_BITS];
|
||||
double poly[LOG2F_POLY_ORDER];
|
||||
} __log2f_data attribute_hidden;
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
/* Not needed. */
|
Loading…
Reference in New Issue