由于linux下的程序一般都是elf格式,所以入口函数通常存放在elf header的



// sysdeps/x86_64/start.S
/* This is the canonical entry point, usually the first thing in the text
   segment.  The SVR4/i386 ABI (pages 3-31, 3-32) says that when the entry
   point runs, most registers' values are unspecified, except for:

   %rdx    Contains a function pointer to be registered with `atexit'.
    This is how the dynamic linker arranges to have DT_FINI
    functions called for shared libraries that have been loaded
    before this code runs.

   %rsp    The stack contains the arguments and environment:
    0(%rsp)        argc
    LP_SIZE(%rsp)      argv[0]
    (LP_SIZE*argc)(%rsp)    NULL
    (LP_SIZE*(argc+1))(%rsp)  envp[0]

#include <sysdep.h>

ENTRY (_start)
  /* Clearing frame pointer is insufficient, use CFI.  */
  cfi_undefined (rip)
  /* Clear the frame pointer.  The ABI suggests this be done, to mark
     the outermost frame obviously.  */
  xorl %ebp, %ebp

  /* Extract the arguments as encoded on the stack and set up
     the arguments for __libc_start_main (int (*main) (int, char **, char **),
       int argc, char *argv,
       void (*init) (void), void (*fini) (void),
       void (*rtld_fini) (void), void *stack_end).
     The arguments are passed via registers and on the stack:
  main:    %rdi
  argc:    %rsi
  argv:    %rdx
  init:    %rcx
  fini:    %r8
  rtld_fini:  %r9
  stack_end:  stack.  */

  mov %RDX_LP, %R9_LP  /* Address of the shared library termination
           function.  */
#ifdef __ILP32__
  mov (%rsp), %esi  /* Simulate popping 4-byte argument count.  */
  add $4, %esp
  popq %rsi    /* Pop the argument count.  */
  /* argv starts just at the current stack top.  */
  mov %RSP_LP, %RDX_LP
  /* Align the stack to a 16 byte boundary to follow the ABI.  */
  and  $~15, %RSP_LP

  /* Push garbage because we push 8 more bytes.  */
  pushq %rax

  /* Provide the highest stack address to the user code (for stacks
     which grow downwards).  */
  pushq %rsp

#ifdef PIC
  /* Pass address of our own entry points to .fini and .init.  */
  mov __libc_csu_fini@GOTPCREL(%rip), %R8_LP
  mov __libc_csu_init@GOTPCREL(%rip), %RCX_LP

  mov main@GOTPCREL(%rip), %RDI_LP
  /* Pass address of our own entry points to .fini and .init.  */
  mov $__libc_csu_fini, %R8_LP
  mov $__libc_csu_init, %RCX_LP

  mov $main, %RDI_LP

  /* Call the user's main function, and exit with its value.
     But let the libc call main.  Since __libc_start_main in is called very early, lazy binding isn't relevant
     here.  Use indirect branch via GOT to avoid extra branch
     to PLT slot.  In case of static executable, ld in binutils
     2.26 or above can convert indirect branch into direct
     branch.  */
  call *__libc_start_main@GOTPCREL(%rip)

  hlt      /* Crash if somehow `exit' does return.   */
END (_start)



该段汇编代码的注释已经把其作用讲的很清楚了,大意就是按照c语言的calling convention,先把__libc_start_main函数所需的参数放入到对应的寄存器或栈中,再调用__libc_start_main函数。



// csu/libc-start.c# define LIBC_START_MAIN __libc_start_main.../* Note: the fini parameter is ignored here for shared library.  It   is registered with __cxa_atexit.  This had the disadvantage that   finalizers were called in more than one place.  */STATIC intLIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),                 int argc, char **argv,                 ...                 __typeof (main) init,                 void (*fini) (void),                 void (*rtld_fini) (void), void *stack_end){  /* Result of the 'main' function.  */  int result;  ...  /* Nothing fancy, just call the function.  */  result = main (argc, argv, __environ MAIN_AUXVEC_PARAM);  ...  exit (result);}

上面就是对应的__libc_start_main函数,由上可见,该函数的参数及其顺序和前面的_start函数里按照c语言的calling convention准备的参数及顺序是一致的。





原文发布于微信公众号 - Linux内核及JVM底层相关技术研究(ytcode)





0 条评论
登录 后参与评论