? arch/aarch64/mdreloc.c.save ? arch/aarch64/rtld_start.S.firstattempt Index: arch/aarch64/mdreloc.c =================================================================== RCS file: /cvsroot/src/libexec/ld.elf_so/arch/aarch64/mdreloc.c,v retrieving revision 1.12 diff -u -r1.12 mdreloc.c --- arch/aarch64/mdreloc.c 23 Nov 2018 11:26:05 -0000 1.12 +++ arch/aarch64/mdreloc.c 16 Jan 2019 07:12:01 -0000 @@ -70,17 +70,17 @@ #include "rtld.h" struct tls_data { - int64_t index; - Obj_Entry *obj; - const Elf_Rela *rela; + size_t td_dtv_gen; + size_t td_tlsindex; + Elf_Addr td_tlsoffs; }; void _rtld_bind_start(void); void _rtld_relocate_nonplt_self(Elf_Dyn *, Elf_Addr); Elf_Addr _rtld_bind(const Obj_Entry *, Elf_Word); -void *_rtld_tlsdesc(void *); +void *_rtld_tlsdesc_static(void *); +void *_rtld_tlsdesc_undef(void *); void *_rtld_tlsdesc_dynamic(void *); -int64_t _rtld_tlsdesc_handle(struct tls_data *, u_int); /* * AARCH64 PLT looks like this; @@ -117,67 +117,64 @@ } static struct tls_data * -_rtld_tlsdesc_alloc(Obj_Entry *obj, const Elf_Rela *rela) +_rtld_tlsdesc_alloc(size_t tlsindex, Elf_Addr offs) { struct tls_data *tlsdesc; tlsdesc = xmalloc(sizeof(*tlsdesc)); - tlsdesc->index = -1; - tlsdesc->obj = obj; - tlsdesc->rela = rela; + tlsdesc->td_dtv_gen = _rtld_tls_dtv_generation; + tlsdesc->td_tlsindex = tlsindex; + tlsdesc->td_tlsoffs = offs; return tlsdesc; } -static int64_t -_rtld_tlsdesc_handle_locked(struct tls_data *tlsdesc, u_int flags) +static void +_rtld_tlsdesc_fill(const Obj_Entry *obj, const Elf_Rela *rela, Elf_Addr *where, u_int flags) { - const Elf_Rela *rela; const Elf_Sym *def; const Obj_Entry *defobj; - Obj_Entry *obj; - - rela = tlsdesc->rela; - obj = tlsdesc->obj; - - def = _rtld_find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj, flags); - if (def == NULL) - _rtld_die(); - - tlsdesc->index = defobj->tlsoffset + def->st_value + rela->r_addend + - sizeof(struct tls_tcb); + Elf_Addr offs = 0; + unsigned long symnum = ELF_R_SYM(rela->r_info); - return tlsdesc->index; -} - -int64_t -_rtld_tlsdesc_handle(struct tls_data *tlsdesc, u_int flags) -{ - sigset_t mask; + if (symnum != 0) { + def = _rtld_find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj, + flags); + if (def == NULL) + _rtld_die(); + if (def == &_rtld_sym_zero) { + /* Weak undefined thread variable */ + where[0] = (Elf_Addr)_rtld_tlsdesc_undef; + where[1] = rela->r_addend; - /* We have already found the index, return it */ - if (tlsdesc->index >= 0) - return tlsdesc->index; - - _rtld_exclusive_enter(&mask); - /* tlsdesc->index may have been set by another thread */ - if (tlsdesc->index == -1) - _rtld_tlsdesc_handle_locked(tlsdesc, flags); - _rtld_exclusive_exit(&mask); + rdbg(("TLSDESC %s (weak) in %s --> %p", + obj->strtab + obj->symtab[symnum].st_name, + obj->path, (void *)where[1])); - return tlsdesc->index; -} + return; + } + offs = def->st_value; + } else { + defobj = obj; + } + offs += rela->r_addend; -static void -_rtld_tlsdesc_fill(Obj_Entry *obj, const Elf_Rela *rela, Elf_Addr *where) -{ - if (ELF_R_SYM(rela->r_info) == 0) { - where[0] = (Elf_Addr)_rtld_tlsdesc; - where[1] = obj->tlsoffset + rela->r_addend + + if (defobj->tls_done) { + /* Variable is in initialy allocated TLS segment */ + where[0] = (Elf_Addr)_rtld_tlsdesc_static; + where[1] = defobj->tlsoffset + offs + sizeof(struct tls_tcb); + + rdbg(("TLSDESC %s --> %p static", + obj->path, (void *)where[1])); } else { + /* TLS offset is unknown at load time, use dynamic resolving */ where[0] = (Elf_Addr)_rtld_tlsdesc_dynamic; - where[1] = (Elf_Addr)_rtld_tlsdesc_alloc(obj, rela); + where[1] = (Elf_Addr)_rtld_tlsdesc_alloc(defobj->tlsindex, offs); + + rdbg(("TLSDESC %s in %s --> %p dynamic (%zu, %p)", + obj->strtab + obj->symtab[symnum].st_name, + obj->path, (void *)where[1], defobj->tlsindex, (void *)offs)); } } @@ -276,7 +273,7 @@ break; case R_TYPE(TLSDESC): - _rtld_tlsdesc_fill(obj, rela, where); + _rtld_tlsdesc_fill(obj, rela, where, 0); break; case R_TLS_TYPE(TLS_DTPREL): @@ -344,7 +341,7 @@ rdbg(("fixup !main in %s --> %p", obj->path, (void *)*where)); break; case R_TYPE(TLSDESC): - _rtld_tlsdesc_fill(obj, rela, where); + _rtld_tlsdesc_fill(obj, rela, where, SYMLOOK_IN_PLT); break; } } @@ -408,11 +405,7 @@ *tp = new_value; break; case R_TYPE(TLSDESC): - if (ELF_R_SYM(rela->r_info) != 0) { - struct tls_data *tlsdesc = (struct tls_data *)where[1]; - if (tlsdesc->index == -1) - _rtld_tlsdesc_handle_locked(tlsdesc, SYMLOOK_IN_PLT); - } + _rtld_tlsdesc_fill(obj, rela, where, SYMLOOK_IN_PLT); break; } Index: arch/aarch64/rtld_start.S =================================================================== RCS file: /cvsroot/src/libexec/ld.elf_so/arch/aarch64/rtld_start.S,v retrieving revision 1.3 diff -u -r1.3 rtld_start.S --- arch/aarch64/rtld_start.S 20 Sep 2018 18:41:05 -0000 1.3 +++ arch/aarch64/rtld_start.S 16 Jan 2019 07:12:01 -0000 @@ -145,47 +145,146 @@ br x17 /* call bound function */ END(_rtld_bind_start) +/* + * struct rel_tlsdesc { + * uint64_t resolver_fnc; + * uint64_t resolver_arg; + * + * + * uint64_t _rtld_tlsdesc_static(struct rel_tlsdesc *); + * + * Resolver function for TLS symbols resolved at load time + */ +ENTRY(_rtld_tlsdesc_static) + .cfi_startproc + ldr x0, [x0, #8] + ret + .cfi_endproc +END(_rtld_tlsdesc_static) + +/* + * uint64_t _rtld_tlsdesc_undef(void); + * + * Resolver function for weak and undefined TLS symbols + */ +ENTRY(_rtld_tlsdesc_undef) + .cfi_startproc + str x1, [sp, #-16]! + .cfi_adjust_cfa_offset 16 -ENTRY(_rtld_tlsdesc) + mrs x1, tpidr_el0 ldr x0, [x0, #8] + sub x0, x0, x1 + + ldr x1, [sp], #16 + .cfi_adjust_cfa_offset -16 + .cfi_endproc ret -END(_rtld_tlsdesc) +END(_rtld_tlsdesc_undef) /* - * uint64_t _rtld_tlsdesc_dynamic(struct tlsdesc *); + * uint64_t _rtld_tlsdesc_dynamic(struct rel_tlsdesc *); * - * TODO: We could lookup the saved index here to skip saving the entire stack. + * Resolver function for TLS symbols from dlopen() */ ENTRY(_rtld_tlsdesc_dynamic) - /* Store any registers we may use in rtld_tlsdesc_handle */ - stp x29, x30, [sp, #-(10 * 16)]! - mov x29, sp - stp x1, x2, [sp, #(1 * 16)] - stp x3, x4, [sp, #(2 * 16)] - stp x5, x6, [sp, #(3 * 16)] - stp x7, x8, [sp, #(4 * 16)] - stp x9, x10, [sp, #(5 * 16)] - stp x11, x12, [sp, #(6 * 16)] - stp x13, x14, [sp, #(7 * 16)] - stp x15, x16, [sp, #(8 * 16)] - stp x17, x18, [sp, #(9 * 16)] + .cfi_startproc + + /* Save registers used in fast path */ + stp x1, x2, [sp, #(-2 * 16)]! + stp x3, x4, [sp, #(1 * 16)] + .cfi_adjust_cfa_offset 2 * 16 + .cfi_rel_offset x1, 0 + .cfi_rel_offset x2, 8 + .cfi_rel_offset x3, 16 + .cfi_rel_offset x4, 24 + + /* Test fastpath - inlined version of __tls_get_addr. */ + + ldr x1, [x0, #8] /* tlsdesc ptr */ + mrs x4, tpidr_el0 + ldr x0, [x4] /* DTV pointer (tcb->tcb_dtv) */ + + ldr x3, [x0, #-8] /* DTV_MAX_INDEX(dtv) */ + ldr x2, [x1, #8] /* tlsdesc->td_tlsindex */ + cmp x2, x3 +// b.lt 1f /* Slow path */ + b 1f + + ldr x3, [x0, x2, lsl #3] /* dtv[tlsdesc->td_tlsindex] */ + cbz x3, 1f + + /* Return (dtv[tlsdesc->td_tlsindex] + tlsdesc->td_tlsoffs - tp) */ + ldr x2, [x1, #16] /* tlsdesc->td_tlsoffs */ + add x2, x2, x3 + sub x0, x2, x4 + + /* Restore registers and return */ + ldp x3, x4, [sp, #(1 * 16)] + ldp x1, x2, [sp], #(2 * 16) + .cfi_adjust_cfa_offset -2 * 16 + ret + + /* + * Slow path + * return _rtld_tls_get_addr(tp, tlsdesc->td_tlsindex, tlsdesc->td_tlsoffs); + * + */ +1: + /* Save all interger registers */ + stp x29, x30, [sp, #-(8 * 16)]! + .cfi_adjust_cfa_offset 8 * 16 + .cfi_rel_offset x29, 0 + .cfi_rel_offset x30, 8 + + stp x5, x6, [sp, #(1 * 16)] + stp x7, x8, [sp, #(2 * 16)] + stp x9, x10, [sp, #(3 * 16)] + stp x11, x12, [sp, #(4 * 16)] + stp x13, x14, [sp, #(5 * 16)] + stp x15, x16, [sp, #(6 * 16)] + stp x17, x18, [sp, #(7 * 16)] + .cfi_rel_offset x5, 16 + .cfi_rel_offset x6, 24 + .cfi_rel_offset x7, 32 + .cfi_rel_offset x8, 40 + .cfi_rel_offset x9, 48 + .cfi_rel_offset x10, 56 + .cfi_rel_offset x11, 64 + .cfi_rel_offset x12, 72 + .cfi_rel_offset x13, 80 + .cfi_rel_offset x14, 88 + .cfi_rel_offset x15, 96 + .cfi_rel_offset x16, 104 + .cfi_rel_offset x17, 112 + .cfi_rel_offset x18, 120 /* Find the tls offset */ - ldr x0, [x0, #8] - mov x1, #1 - bl _rtld_tlsdesc_handle + mov x0, x4 /* tp */ + mov x3, x1 /* tlsdesc ptr */ + ldr w1, [x3, #8] /* tlsdesc->td_tlsindex */ + ldr x2, [x3, #16] /* tlsdesc->td_tlsoffs */ + bl _rtld_tls_get_addr + mrs x1, tpidr_el0 + sub x0, x0, x1 - /* Restore the registers */ - ldp x17, x18, [sp, #(9 * 16)] - ldp x15, x16, [sp, #(8 * 16)] - ldp x13, x14, [sp, #(7 * 16)] - ldp x11, x12, [sp, #(6 * 16)] - ldp x9, x10, [sp, #(5 * 16)] - ldp x7, x8, [sp, #(4 * 16)] - ldp x5, x6, [sp, #(3 * 16)] - ldp x3, x4, [sp, #(2 * 16)] - ldp x1, x2, [sp, #(1 * 16)] - ldp x29, x30, [sp], #(10 * 16) + /* Restore slow path registers */ + ldp x17, x18, [sp, #(7 * 16)] + ldp x15, x16, [sp, #(6 * 16)] + ldp x13, x14, [sp, #(5 * 16)] + ldp x11, x12, [sp, #(4 * 16)] + ldp x9, x10, [sp, #(3 * 16)] + ldp x7, x8, [sp, #(2 * 16)] + ldp x5, x6, [sp, #(1 * 16)] + ldp x29, x30, [sp], #(8 * 16) + .cfi_adjust_cfa_offset -8 * 16 + .cfi_restore x29 + .cfi_restore x30 + /* Restore fast path registers and return */ + ldp x3, x4, [sp, #16] + ldp x1, x2, [sp], #(2 * 16) + .cfi_adjust_cfa_offset -2 * 16 + .cfi_endproc ret END(_rtld_tlsdesc_dynamic)