1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00

tools/nolibc: make compiler and assembler agree on the section around _start

The out-of-block asm() statement carrying _start does not allow the
compiler to know what section the assembly code is being emitted to,
and there's no easy way to push/pop the current section and restore
it. It sometimes causes issues depending on the include files ordering
and compiler optimizations. For example if a variable is declared
immediately before the asm() block and another one after, the compiler
assumes that the current section is still .bss and doesn't re-emit it,
making the second variable appear inside the .text section instead.
Forcing .bss at the end of the _start block doesn't work either because
at certain optimizations the compiler may reorder blocks and will make
some real code appear just after this block.

A significant number of solutions were attempted, but many of them were
still sensitive to section reordering. In the end, the best way to make
sure the compiler and assembler agree on the current section is to place
this code inside a function. Here the function is directly called _start
and configured not to emit a frame-pointer, hence to have no prologue.
If some future architectures would still emit some prologue, another
working approach consists in naming the function differently and placing
the _start label inside the asm statement. But the current solution is
simpler.

It was tested with nolibc-test at -O,-O0,-O2,-O3,-Os for arm,arm64,i386,
mips,riscv,s390 and x86_64.

Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
This commit is contained in:
Willy Tarreau 2023-01-10 08:24:13 +01:00 committed by Paul E. McKenney
parent 28ef4c3753
commit 7f85485896
7 changed files with 133 additions and 131 deletions

View file

@ -182,18 +182,19 @@ struct sys_stat_struct {
}) })
/* startup code */ /* startup code */
__asm__ (".section .text\n" void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
".weak _start\n" {
"_start:\n" __asm__ volatile (
"ldr x0, [sp]\n" // argc (x0) was in the stack "ldr x0, [sp]\n" // argc (x0) was in the stack
"add x1, sp, 8\n" // argv (x1) = sp "add x1, sp, 8\n" // argv (x1) = sp
"lsl x2, x0, 3\n" // envp (x2) = 8*argc ... "lsl x2, x0, 3\n" // envp (x2) = 8*argc ...
"add x2, x2, 8\n" // + 8 (skip null) "add x2, x2, 8\n" // + 8 (skip null)
"add x2, x2, x1\n" // + argv "add x2, x2, x1\n" // + argv
"and sp, x1, -16\n" // sp must be 16-byte aligned in the callee "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee
"bl main\n" // main() returns the status code, we'll exit with it. "bl main\n" // main() returns the status code, we'll exit with it.
"mov x8, 93\n" // NR_exit == 93 "mov x8, 93\n" // NR_exit == 93
"svc #0\n" "svc #0\n"
""); );
__builtin_unreachable();
}
#endif // _NOLIBC_ARCH_AARCH64_H #endif // _NOLIBC_ARCH_AARCH64_H

View file

@ -175,30 +175,20 @@ struct sys_stat_struct {
}) })
/* startup code */ /* startup code */
__asm__ (".section .text\n" void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
".weak _start\n" {
"_start:\n" __asm__ volatile (
#if defined(__THUMBEB__) || defined(__THUMBEL__) "pop {%r0}\n" // argc was in the stack
/* We enter here in 32-bit mode but if some previous functions were in "mov %r1, %sp\n" // argv = sp
* 16-bit mode, the assembler cannot know, so we need to tell it we're in "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
* 32-bit now, then switch to 16-bit (is there a better way to do it than "add %r2, %r2, $4\n" // ... + 4
* adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the
* it generates correct instructions. Note that we do not support thumb1. "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc)
*/ "bl main\n" // main() returns the status code, we'll exit with it.
".code 32\n" "movs r7, $1\n" // NR_exit == 1
"add r0, pc, #1\n" "svc $0x00\n"
"bx r0\n" );
".code 16\n" __builtin_unreachable();
#endif }
"pop {%r0}\n" // argc was in the stack
"mov %r1, %sp\n" // argv = sp
"add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
"add %r2, %r2, $4\n" // ... + 4
"and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the
"mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc)
"bl main\n" // main() returns the status code, we'll exit with it.
"movs r7, $1\n" // NR_exit == 1
"svc $0x00\n"
"");
#endif // _NOLIBC_ARCH_ARM_H #endif // _NOLIBC_ARCH_ARM_H

View file

@ -197,23 +197,25 @@ struct sys_stat_struct {
* 2) The deepest stack frame should be set to zero * 2) The deepest stack frame should be set to zero
* *
*/ */
__asm__ (".section .text\n" void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
".weak _start\n" {
"_start:\n" __asm__ volatile (
"pop %eax\n" // argc (first arg, %eax) "pop %eax\n" // argc (first arg, %eax)
"mov %esp, %ebx\n" // argv[] (second arg, %ebx) "mov %esp, %ebx\n" // argv[] (second arg, %ebx)
"lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx) "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
"xor %ebp, %ebp\n" // zero the stack frame "xor %ebp, %ebp\n" // zero the stack frame
"and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before
"sub $4, %esp\n" // the call instruction (args are aligned) "sub $4, %esp\n" // the call instruction (args are aligned)
"push %ecx\n" // push all registers on the stack so that we "push %ecx\n" // push all registers on the stack so that we
"push %ebx\n" // support both regparm and plain stack modes "push %ebx\n" // support both regparm and plain stack modes
"push %eax\n" "push %eax\n"
"call main\n" // main() returns the status code in %eax "call main\n" // main() returns the status code in %eax
"mov %eax, %ebx\n" // retrieve exit code (32-bit int) "mov %eax, %ebx\n" // retrieve exit code (32-bit int)
"movl $1, %eax\n" // NR_exit == 1 "movl $1, %eax\n" // NR_exit == 1
"int $0x80\n" // exit now "int $0x80\n" // exit now
"hlt\n" // ensure it does not "hlt\n" // ensure it does not
""); );
__builtin_unreachable();
}
#endif // _NOLIBC_ARCH_I386_H #endif // _NOLIBC_ARCH_I386_H

View file

@ -189,29 +189,32 @@ struct sys_stat_struct {
}) })
/* startup code, note that it's called __start on MIPS */ /* startup code, note that it's called __start on MIPS */
__asm__ (".section .text\n" void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) __start(void)
".weak __start\n" {
".set nomips16\n" __asm__ volatile (
".set push\n" //".set nomips16\n"
".set noreorder\n" ".set push\n"
".option pic0\n" ".set noreorder\n"
".ent __start\n" ".option pic0\n"
"__start:\n" //".ent __start\n"
"lw $a0,($sp)\n" // argc was in the stack //"__start:\n"
"addiu $a1, $sp, 4\n" // argv = sp + 4 "lw $a0,($sp)\n" // argc was in the stack
"sll $a2, $a0, 2\n" // a2 = argc * 4 "addiu $a1, $sp, 4\n" // argv = sp + 4
"add $a2, $a2, $a1\n" // envp = argv + 4*argc ... "sll $a2, $a0, 2\n" // a2 = argc * 4
"addiu $a2, $a2, 4\n" // ... + 4 "add $a2, $a2, $a1\n" // envp = argv + 4*argc ...
"li $t0, -8\n" "addiu $a2, $a2, 4\n" // ... + 4
"and $sp, $sp, $t0\n" // sp must be 8-byte aligned "li $t0, -8\n"
"addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there! "and $sp, $sp, $t0\n" // sp must be 8-byte aligned
"jal main\n" // main() returns the status code, we'll exit with it. "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there!
"nop\n" // delayed slot "jal main\n" // main() returns the status code, we'll exit with it.
"move $a0, $v0\n" // retrieve 32-bit exit code from v0 "nop\n" // delayed slot
"li $v0, 4001\n" // NR_exit == 4001 "move $a0, $v0\n" // retrieve 32-bit exit code from v0
"syscall\n" "li $v0, 4001\n" // NR_exit == 4001
".end __start\n" "syscall\n"
".set pop\n" //".end __start\n"
""); ".set pop\n"
);
__builtin_unreachable();
}
#endif // _NOLIBC_ARCH_MIPS_H #endif // _NOLIBC_ARCH_MIPS_H

View file

@ -183,22 +183,24 @@ struct sys_stat_struct {
}) })
/* startup code */ /* startup code */
__asm__ (".section .text\n" void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
".weak _start\n" {
"_start:\n" __asm__ volatile (
".option push\n" ".option push\n"
".option norelax\n" ".option norelax\n"
"lla gp, __global_pointer$\n" "lla gp, __global_pointer$\n"
".option pop\n" ".option pop\n"
"lw a0, 0(sp)\n" // argc (a0) was in the stack "lw a0, 0(sp)\n" // argc (a0) was in the stack
"add a1, sp, "SZREG"\n" // argv (a1) = sp "add a1, sp, "SZREG"\n" // argv (a1) = sp
"slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ... "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ...
"add a2, a2, "SZREG"\n" // + SZREG (skip null) "add a2, a2, "SZREG"\n" // + SZREG (skip null)
"add a2,a2,a1\n" // + argv "add a2,a2,a1\n" // + argv
"andi sp,a1,-16\n" // sp must be 16-byte aligned "andi sp,a1,-16\n" // sp must be 16-byte aligned
"call main\n" // main() returns the status code, we'll exit with it. "call main\n" // main() returns the status code, we'll exit with it.
"li a7, 93\n" // NR_exit == 93 "li a7, 93\n" // NR_exit == 93
"ecall\n" "ecall\n"
""); );
__builtin_unreachable();
}
#endif // _NOLIBC_ARCH_RISCV_H #endif // _NOLIBC_ARCH_RISCV_H

View file

@ -172,27 +172,29 @@ struct sys_stat_struct {
}) })
/* startup code */ /* startup code */
__asm__ (".section .text\n" void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
".weak _start\n" {
"_start:\n" __asm__ volatile (
"lg %r2,0(%r15)\n" /* argument count */ "lg %r2,0(%r15)\n" /* argument count */
"la %r3,8(%r15)\n" /* argument pointers */ "la %r3,8(%r15)\n" /* argument pointers */
"xgr %r0,%r0\n" /* r0 will be our NULL value */ "xgr %r0,%r0\n" /* r0 will be our NULL value */
/* search for envp */ /* search for envp */
"lgr %r4,%r3\n" /* start at argv */ "lgr %r4,%r3\n" /* start at argv */
"0:\n" "0:\n"
"clg %r0,0(%r4)\n" /* entry zero? */ "clg %r0,0(%r4)\n" /* entry zero? */
"la %r4,8(%r4)\n" /* advance pointer */ "la %r4,8(%r4)\n" /* advance pointer */
"jnz 0b\n" /* no -> test next pointer */ "jnz 0b\n" /* no -> test next pointer */
/* yes -> r4 now contains start of envp */ /* yes -> r4 now contains start of envp */
"aghi %r15,-160\n" /* allocate new stackframe */ "aghi %r15,-160\n" /* allocate new stackframe */
"xc 0(8,%r15),0(%r15)\n" /* clear backchain */ "xc 0(8,%r15),0(%r15)\n" /* clear backchain */
"brasl %r14,main\n" /* ret value of main is arg to exit */ "brasl %r14,main\n" /* ret value of main is arg to exit */
"lghi %r1,1\n" /* __NR_exit */ "lghi %r1,1\n" /* __NR_exit */
"svc 0\n" "svc 0\n"
""); );
__builtin_unreachable();
}
struct s390_mmap_arg_struct { struct s390_mmap_arg_struct {
unsigned long addr; unsigned long addr;

View file

@ -197,19 +197,21 @@ struct sys_stat_struct {
* 2) The deepest stack frame should be zero (the %rbp). * 2) The deepest stack frame should be zero (the %rbp).
* *
*/ */
__asm__ (".section .text\n" void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
".weak _start\n" {
"_start:\n" __asm__ volatile (
"pop %rdi\n" // argc (first arg, %rdi) "pop %rdi\n" // argc (first arg, %rdi)
"mov %rsp, %rsi\n" // argv[] (second arg, %rsi) "mov %rsp, %rsi\n" // argv[] (second arg, %rsi)
"lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
"xor %ebp, %ebp\n" // zero the stack frame "xor %ebp, %ebp\n" // zero the stack frame
"and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call
"call main\n" // main() returns the status code, we'll exit with it. "call main\n" // main() returns the status code, we'll exit with it.
"mov %eax, %edi\n" // retrieve exit code (32 bit) "mov %eax, %edi\n" // retrieve exit code (32 bit)
"mov $60, %eax\n" // NR_exit == 60 "mov $60, %eax\n" // NR_exit == 60
"syscall\n" // really exit "syscall\n" // really exit
"hlt\n" // ensure it does not return "hlt\n" // ensure it does not return
""); );
__builtin_unreachable();
}
#endif // _NOLIBC_ARCH_X86_64_H #endif // _NOLIBC_ARCH_X86_64_H