Save/restore bound registers in _dl_runtime_resolve

This patch saves and restores bound registers in symbol lookup for x86-64:

1. Branches without BND prefix clear bound registers.
2. x86-64 pass bounds in bound registers as specified in MPX psABI
extension on hjl/mpx/master branch at

https://github.com/hjl-tools/x86-64-psABI
https://groups.google.com/forum/#!topic/x86-64-abi/KFsB0XTgWYc

Binutils has been updated to create an alternate PLT to add BND prefix
when branching to ld.so.

	* config.h.in (HAVE_MPX_SUPPORT): New #undef.
	* sysdeps/x86_64/configure.ac: Set HAVE_MPX_SUPPORT.
	* sysdeps/x86_64/configure: Regenerated.
	* sysdeps/x86_64/dl-trampoline.S (REGISTER_SAVE_AREA): New
	macro.
	(REGISTER_SAVE_RAX): Likewise.
	(REGISTER_SAVE_RCX): Likewise.
	(REGISTER_SAVE_RDX): Likewise.
	(REGISTER_SAVE_RSI): Likewise.
	(REGISTER_SAVE_RDI): Likewise.
	(REGISTER_SAVE_R8): Likewise.
	(REGISTER_SAVE_R9): Likewise.
	(REGISTER_SAVE_BND0): Likewise.
	(REGISTER_SAVE_BND1): Likewise.
	(REGISTER_SAVE_BND2): Likewise.
	(_dl_runtime_resolve): Use them.  Save and restore Intel MPX
	bound registers when calling _dl_fixup.
This commit is contained in:
Igor Zamyatin 2014-04-01 10:16:04 -07:00 committed by H.J. Lu
parent 27822ce67f
commit a4c75cfd56
5 changed files with 145 additions and 20 deletions

View File

@ -1,3 +1,24 @@
2014-04-09 Igor Zamyatin <igor.zamyatin@intel.com>
H.J. Lu <hongjiu.lu@intel.com>
* config.h.in (HAVE_MPX_SUPPORT): New #undef.
* sysdeps/x86_64/configure.ac: Set HAVE_MPX_SUPPORT.
* sysdeps/x86_64/configure: Regenerated.
* sysdeps/x86_64/dl-trampoline.S (REGISTER_SAVE_AREA): New
macro.
(REGISTER_SAVE_RAX): Likewise.
(REGISTER_SAVE_RCX): Likewise.
(REGISTER_SAVE_RDX): Likewise.
(REGISTER_SAVE_RSI): Likewise.
(REGISTER_SAVE_RDI): Likewise.
(REGISTER_SAVE_R8): Likewise.
(REGISTER_SAVE_R9): Likewise.
(REGISTER_SAVE_BND0): Likewise.
(REGISTER_SAVE_BND1): Likewise.
(REGISTER_SAVE_BND2): Likewise.
(_dl_runtime_resolve): Use them. Save and restore Intel MPX
bound registers when calling _dl_fixup.
2014-04-09 Adhemerval Zanella <azanella@linux.vnet.ibm.com> 2014-04-09 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
* bits/string.h (_STRING_ARCH_unaligned): Define it to 0. * bits/string.h (_STRING_ARCH_unaligned): Define it to 0.

View File

@ -104,6 +104,9 @@
/* Define if assembler supports AVX512. */ /* Define if assembler supports AVX512. */
#undef HAVE_AVX512_ASM_SUPPORT #undef HAVE_AVX512_ASM_SUPPORT
/* Define if assembler supports Intel MPX. */
#undef HAVE_MPX_SUPPORT
/* Define if gcc supports FMA4. */ /* Define if gcc supports FMA4. */
#undef HAVE_FMA4_SUPPORT #undef HAVE_FMA4_SUPPORT

View File

@ -222,6 +222,33 @@ $as_echo "$libc_cv_cc_novzeroupper" >&6; }
config_vars="$config_vars config_vars="$config_vars
config-cflags-novzeroupper = $libc_cv_cc_novzeroupper" config-cflags-novzeroupper = $libc_cv_cc_novzeroupper"
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Intel MPX support" >&5
$as_echo_n "checking for Intel MPX support... " >&6; }
if ${libc_cv_asm_mpx+:} false; then :
$as_echo_n "(cached) " >&6
else
cat > conftest.s <<\EOF
bndmov %bnd0,(%rsp)
EOF
if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
(eval $ac_try) 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then
libc_cv_asm_mpx=yes
else
libc_cv_asm_mpx=no
fi
rm -f conftest*
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_mpx" >&5
$as_echo "$libc_cv_asm_mpx" >&6; }
if test $libc_cv_asm_mpx == yes; then
$as_echo "#define HAVE_MPX_SUPPORT 1" >>confdefs.h
fi
$as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
# work around problem with autoconf and empty lines at the end of files # work around problem with autoconf and empty lines at the end of files

View File

@ -75,6 +75,21 @@ LIBC_TRY_CC_OPTION([-mno-vzeroupper],
]) ])
LIBC_CONFIG_VAR([config-cflags-novzeroupper], [$libc_cv_cc_novzeroupper]) LIBC_CONFIG_VAR([config-cflags-novzeroupper], [$libc_cv_cc_novzeroupper])
dnl Check whether asm supports Intel MPX
AC_CACHE_CHECK(for Intel MPX support, libc_cv_asm_mpx, [dnl
cat > conftest.s <<\EOF
bndmov %bnd0,(%rsp)
EOF
if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then
libc_cv_asm_mpx=yes
else
libc_cv_asm_mpx=no
fi
rm -f conftest*])
if test $libc_cv_asm_mpx == yes; then
AC_DEFINE(HAVE_MPX_SUPPORT)
fi
dnl It is always possible to access static and hidden symbols in an dnl It is always possible to access static and hidden symbols in an
dnl position independent way. dnl position independent way.
AC_DEFINE(PI_STATIC_AND_HIDDEN) AC_DEFINE(PI_STATIC_AND_HIDDEN)

View File

@ -24,6 +24,30 @@
# error RTLD_SAVESPACE_SSE must be aligned to 32 bytes # error RTLD_SAVESPACE_SSE must be aligned to 32 bytes
#endif #endif
/* Area on stack to save and restore registers used for parameter
passing when calling _dl_fixup. */
#ifdef __ILP32__
/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX. */
# define REGISTER_SAVE_AREA (8 * 7)
# define REGISTER_SAVE_RAX 0
#else
/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as BND0,
BND1, BND2, BND3. */
# define REGISTER_SAVE_AREA (8 * 7 + 16 * 4)
/* Align bound register save area to 16 bytes. */
# define REGISTER_SAVE_BND0 0
# define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16)
# define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16)
# define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16)
# define REGISTER_SAVE_RAX (REGISTER_SAVE_BND3 + 16)
#endif
#define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8)
#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8)
#define REGISTER_SAVE_RSI (REGISTER_SAVE_RDX + 8)
#define REGISTER_SAVE_RDI (REGISTER_SAVE_RSI + 8)
#define REGISTER_SAVE_R8 (REGISTER_SAVE_RDI + 8)
#define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8)
.text .text
.globl _dl_runtime_resolve .globl _dl_runtime_resolve
.type _dl_runtime_resolve, @function .type _dl_runtime_resolve, @function
@ -31,28 +55,63 @@
cfi_startproc cfi_startproc
_dl_runtime_resolve: _dl_runtime_resolve:
cfi_adjust_cfa_offset(16) # Incorporate PLT cfi_adjust_cfa_offset(16) # Incorporate PLT
subq $56,%rsp subq $REGISTER_SAVE_AREA,%rsp
cfi_adjust_cfa_offset(56) cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
movq %rax,(%rsp) # Preserve registers otherwise clobbered. # Preserve registers otherwise clobbered.
movq %rcx, 8(%rsp) movq %rax, REGISTER_SAVE_RAX(%rsp)
movq %rdx, 16(%rsp) movq %rcx, REGISTER_SAVE_RCX(%rsp)
movq %rsi, 24(%rsp) movq %rdx, REGISTER_SAVE_RDX(%rsp)
movq %rdi, 32(%rsp) movq %rsi, REGISTER_SAVE_RSI(%rsp)
movq %r8, 40(%rsp) movq %rdi, REGISTER_SAVE_RDI(%rsp)
movq %r9, 48(%rsp) movq %r8, REGISTER_SAVE_R8(%rsp)
movq 64(%rsp), %rsi # Copy args pushed by PLT in register. movq %r9, REGISTER_SAVE_R9(%rsp)
movq 56(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index #ifndef __ILP32__
# We also have to preserve bound registers. These are nops if
# Intel MPX isn't available or disabled.
# ifdef HAVE_MPX_SUPPORT
bndmov %bnd0, REGISTER_SAVE_BND0(%rsp)
bndmov %bnd1, REGISTER_SAVE_BND1(%rsp)
bndmov %bnd2, REGISTER_SAVE_BND2(%rsp)
bndmov %bnd3, REGISTER_SAVE_BND3(%rsp)
# else
.byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0
.byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1
.byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2
.byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3
# endif
#endif
# Copy args pushed by PLT in register.
# %rdi: link_map, %rsi: reloc_index
movq (REGISTER_SAVE_AREA + 8)(%rsp), %rsi
movq REGISTER_SAVE_AREA(%rsp), %rdi
call _dl_fixup # Call resolver. call _dl_fixup # Call resolver.
movq %rax, %r11 # Save return value movq %rax, %r11 # Save return value
movq 48(%rsp), %r9 # Get register content back. #ifndef __ILP32__
movq 40(%rsp), %r8 # Restore bound registers. These are nops if Intel MPX isn't
movq 32(%rsp), %rdi # avaiable or disabled.
movq 24(%rsp), %rsi # ifdef HAVE_MPX_SUPPORT
movq 16(%rsp), %rdx bndmov REGISTER_SAVE_BND3(%rsp), %bnd3
movq 8(%rsp), %rcx bndmov REGISTER_SAVE_BND2(%rsp), %bnd2
movq (%rsp), %rax bndmov REGISTER_SAVE_BND1(%rsp), %bnd1
addq $72, %rsp # Adjust stack(PLT did 2 pushes) bndmov REGISTER_SAVE_BND0(%rsp), %bnd0
cfi_adjust_cfa_offset(-72) # else
.byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3
.byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2
.byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1
.byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0
# endif
#endif
# Get register content back.
movq REGISTER_SAVE_R9(%rsp), %r9
movq REGISTER_SAVE_R8(%rsp), %r8
movq REGISTER_SAVE_RDI(%rsp), %rdi
movq REGISTER_SAVE_RSI(%rsp), %rsi
movq REGISTER_SAVE_RDX(%rsp), %rdx
movq REGISTER_SAVE_RCX(%rsp), %rcx
movq REGISTER_SAVE_RAX(%rsp), %rax
# Adjust stack(PLT did 2 pushes)
addq $(REGISTER_SAVE_AREA + 16), %rsp
cfi_adjust_cfa_offset(-(REGISTER_SAVE_AREA + 16))
jmp *%r11 # Jump to function address. jmp *%r11 # Jump to function address.
cfi_endproc cfi_endproc
.size _dl_runtime_resolve, .-_dl_runtime_resolve .size _dl_runtime_resolve, .-_dl_runtime_resolve