diff options
| author | H.J. Lu <hjl.tools@gmail.com> | 2025-06-09 05:22:10 +0800 |
|---|---|---|
| committer | H.J. Lu <hjl.tools@gmail.com> | 2025-06-19 04:30:31 +0800 |
| commit | 848f0e46f03f22404ed9a8aabf3fd5ce8809a1be (patch) | |
| tree | 5f5ef1adaac3736e7e426a03b463f7b2e58428df /sysdeps/x86 | |
| parent | abc2e954af77f8d10f4f54754520814590e79830 (diff) | |
i386: Update ___tls_get_addr to preserve vector registers
Compiler generates the following instruction sequence for dynamic TLS
access:
leal tls_var@tlsgd(,%ebx,1), %eax
call ___tls_get_addr@PLT
CALL instruction is transparent to compiler which assumes all registers,
except for EFLAGS, AX, CX, and DX, are unchanged after CALL. But
___tls_get_addr is a normal function which doesn't preserve any vector
registers.
1. Rename the generic __tls_get_addr function to ___tls_get_addr_internal.
2. Change ___tls_get_addr to a wrapper function with implementations for
FNSAVE, FXSAVE, XSAVE and XSAVEC to save and restore all vector registers.
3. dl-tlsdesc-dynamic.h has:
_dl_tlsdesc_dynamic:
/* Like all TLS resolvers, preserve call-clobbered registers.
We need two scratch regs anyway. */
subl $32, %esp
cfi_adjust_cfa_offset (32)
It is wrong to use
movl %ebx, -28(%esp)
movl %esp, %ebx
cfi_def_cfa_register(%ebx)
...
mov %ebx, %esp
cfi_def_cfa_register(%esp)
movl -28(%esp), %ebx
to preserve EBX on stack. Fix it with:
movl %ebx, 28(%esp)
movl %esp, %ebx
cfi_def_cfa_register(%ebx)
...
mov %ebx, %esp
cfi_def_cfa_register(%esp)
movl 28(%esp), %ebx
4. Update _dl_tlsdesc_dynamic to call ___tls_get_addr_internal directly.
5. Add have-test-mtls-traditional to compile tst-tls23-mod.c with
traditional TLS variant to verify the fix.
6. Define DL_RUNTIME_RESOLVE_REALIGN_STACK in sysdeps/x86/sysdep.h.
This fixes BZ #32996.
Co-Authored-By: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Diffstat (limited to 'sysdeps/x86')
| -rw-r--r-- | sysdeps/x86/Makefile | 16 | ||||
| -rw-r--r-- | sysdeps/x86/sysdep.h | 23 | ||||
| -rw-r--r-- | sysdeps/x86/tst-tls23.c | 22 | ||||
| -rw-r--r-- | sysdeps/x86/tst-tls23.h | 35 |
4 files changed, 95 insertions, 1 deletions
diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile index 01b0192ddf..f64cee3cd9 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile @@ -4,7 +4,13 @@ endif ifeq ($(subdir),elf) sysdep_routines += get-cpuid-feature-leaf -sysdep-dl-routines += dl-get-cpu-features +sysdep-dl-routines += \ + dl-get-cpu-features \ + dl-tlsdesc \ + tls_get_addr \ + tlsdesc \ +# sysdep-dl-routines + sysdep_headers += \ bits/platform/features.h \ bits/platform/x86.h \ @@ -113,6 +119,14 @@ $(objpfx)tst-gnu2-tls2-x86-noxsavexsavec.out: \ $(objpfx)tst-gnu2-tls2mod0.so \ $(objpfx)tst-gnu2-tls2mod1.so \ $(objpfx)tst-gnu2-tls2mod2.so + +CFLAGS-tst-tls23.c += -msse2 +CFLAGS-tst-tls23-mod.c += -msse2 -mtune=haswell + +LDFLAGS-tst-tls23 += -rdynamic +tst-tls23-mod.so-no-z-defs = yes + +$(objpfx)tst-tls23-mod.so: $(libsupport) endif ifeq ($(subdir),math) diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h index c3c73e75dd..b8e963b654 100644 --- a/sysdeps/x86/sysdep.h +++ b/sysdeps/x86/sysdep.h @@ -183,6 +183,29 @@ #define atom_text_section .section ".text.atom", "ax" +#ifndef DL_STACK_ALIGNMENT +/* Due to GCC bug: + + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 + + __tls_get_addr may be called with 8-byte/4-byte stack alignment. + Although this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't + assume that stack will be always aligned at 16 bytes. */ +# ifdef __x86_64__ +# define DL_STACK_ALIGNMENT 8 +# define MINIMUM_ALIGNMENT 16 +# else +# define DL_STACK_ALIGNMENT 4 +# endif +#endif + +/* True if _dl_runtime_resolve/_dl_tlsdesc_dynamic should align stack for + STATE_SAVE or align stack to MINIMUM_ALIGNMENT bytes before calling + _dl_fixup/__tls_get_addr. */ +#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ + (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ + || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) + #endif /* __ASSEMBLER__ */ #endif /* _X86_SYSDEP_H */ diff --git a/sysdeps/x86/tst-tls23.c b/sysdeps/x86/tst-tls23.c new file mode 100644 index 0000000000..6130d91cf8 --- /dev/null +++ b/sysdeps/x86/tst-tls23.c @@ -0,0 +1,22 @@ +#ifndef __x86_64__ +#include <sys/platform/x86.h> + +#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2) +#endif + +/* Set XMM0...XMM7 to all 1s. */ +#define PREPARE_MALLOC() \ +{ \ + asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" ); \ + asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" ); \ + asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" ); \ + asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" ); \ + asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" ); \ + asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" ); \ + asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" ); \ + asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" ); \ +} + +#include <elf/tst-tls23.c> + +v2di v1, v2, v3; diff --git a/sysdeps/x86/tst-tls23.h b/sysdeps/x86/tst-tls23.h new file mode 100644 index 0000000000..21cee4ca07 --- /dev/null +++ b/sysdeps/x86/tst-tls23.h @@ -0,0 +1,35 @@ +/* Test that __tls_get_addr preserves XMM registers. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <support/check.h> + +typedef long long v2di __attribute__((vector_size(16))); +extern v2di v1, v2, v3; + +#define BEFORE_TLS_CALL() \ + v1 = __extension__(v2di){0, 0}; \ + v2 = __extension__(v2di){0, 0}; + +#define AFTER_TLS_CALL() \ + v3 = __extension__(v2di){0, 0}; \ + asm volatile ("" : "+x" (v3)); \ + union { v2di x; long long a[2]; } u; \ + u.x = v3; \ + TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0); + +#include <elf/tst-tls23.h> |
