mirror of git://sourceware.org/git/glibc.git
This patch adds new function libc_feholdsetround_noex_aarch64_ctx, enabling
further optimization. libc_feholdsetround_aarch64_ctx now only needs to read the FPCR in the typical case, avoiding a redundant FPSR read. Performance results show a good improvement (5-10% on sin()) on cores with expensive FPCR/FPSR instructions.
This commit is contained in:
parent
538e9e454d
commit
656b84c2ef
|
|
@ -1,3 +1,8 @@
|
||||||
|
2014-08-07 Wilco Dijkstra <wdijkstr@arm.com>
|
||||||
|
|
||||||
|
* sysdeps/aarch64/fpu/math_private.h
|
||||||
|
(libc_feholdsetround_noex_aarch64_ctx): New function.
|
||||||
|
|
||||||
2014-08-07 Wilco Dijkstra <wdijkstr@arm.com>
|
2014-08-07 Wilco Dijkstra <wdijkstr@arm.com>
|
||||||
|
|
||||||
* sysdeps/arm/armv6/strcpy.S (strcpy):
|
* sysdeps/arm/armv6/strcpy.S (strcpy):
|
||||||
|
|
|
||||||
|
|
@ -228,12 +228,9 @@ static __always_inline void
|
||||||
libc_feholdsetround_aarch64_ctx (struct rm_ctx *ctx, int r)
|
libc_feholdsetround_aarch64_ctx (struct rm_ctx *ctx, int r)
|
||||||
{
|
{
|
||||||
fpu_control_t fpcr;
|
fpu_control_t fpcr;
|
||||||
fpu_fpsr_t fpsr;
|
|
||||||
int round;
|
int round;
|
||||||
|
|
||||||
_FPU_GETCW (fpcr);
|
_FPU_GETCW (fpcr);
|
||||||
_FPU_GETFPSR (fpsr);
|
|
||||||
ctx->env.__fpsr = fpsr;
|
|
||||||
|
|
||||||
/* Check whether rounding modes are different. */
|
/* Check whether rounding modes are different. */
|
||||||
round = (fpcr ^ r) & _FPU_FPCR_RM_MASK;
|
round = (fpcr ^ r) & _FPU_FPCR_RM_MASK;
|
||||||
|
|
@ -263,6 +260,33 @@ libc_feresetround_aarch64_ctx (struct rm_ctx *ctx)
|
||||||
#define libc_feresetroundf_ctx libc_feresetround_aarch64_ctx
|
#define libc_feresetroundf_ctx libc_feresetround_aarch64_ctx
|
||||||
#define libc_feresetroundl_ctx libc_feresetround_aarch64_ctx
|
#define libc_feresetroundl_ctx libc_feresetround_aarch64_ctx
|
||||||
|
|
||||||
|
static __always_inline void
|
||||||
|
libc_feholdsetround_noex_aarch64_ctx (struct rm_ctx *ctx, int r)
|
||||||
|
{
|
||||||
|
fpu_control_t fpcr;
|
||||||
|
fpu_fpsr_t fpsr;
|
||||||
|
int round;
|
||||||
|
|
||||||
|
_FPU_GETCW (fpcr);
|
||||||
|
_FPU_GETFPSR (fpsr);
|
||||||
|
ctx->env.__fpsr = fpsr;
|
||||||
|
|
||||||
|
/* Check whether rounding modes are different. */
|
||||||
|
round = (fpcr ^ r) & _FPU_FPCR_RM_MASK;
|
||||||
|
ctx->updated_status = round != 0;
|
||||||
|
|
||||||
|
/* Set the rounding mode if changed. */
|
||||||
|
if (__glibc_unlikely (round != 0))
|
||||||
|
{
|
||||||
|
ctx->env.__fpcr = fpcr;
|
||||||
|
_FPU_SETCW (fpcr ^ round);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define libc_feholdsetround_noex_ctx libc_feholdsetround_noex_aarch64_ctx
|
||||||
|
#define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_aarch64_ctx
|
||||||
|
#define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_aarch64_ctx
|
||||||
|
|
||||||
static __always_inline void
|
static __always_inline void
|
||||||
libc_feresetround_noex_aarch64_ctx (struct rm_ctx *ctx)
|
libc_feresetround_noex_aarch64_ctx (struct rm_ctx *ctx)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue