#include <stdio.h>
 struct xuzhina_dump_c05_s3_stru
 {
     char a;
     short b;
     int c;
     long d;
     float e;
     double f;
     void* g;
 };
 
 int main()
 {
     struct xuzhina_dump_c05_s3_stru test;
     printf( "addr:%x, 1st:%x, 2nd:%x, 3rd:%x, 4th:%x, 5th:%x, 6th:%x, 7th:%x
    ",
             &test, &test.a, &test.b, &test.c, &test.d, &test.e, &test.f, &te
    st.g );
     return 0;
 }

运行结果：

addr:bf909eb4, 1st:bf909eb4, 2nd:bf909eb6, 3rd:bf909eb8, 4th:bf909ebc, 5th:bf909ec0, 6th:bf909ec4, 7th:bf909ecc

可得这样的结论：

1. 结构体对象的地址和第一个成员地址一样。也就是所谓的基地址

2. 第一个成员a和第二个成员b分别是char,short，不足dword大小，为了内存对齐，挤一个dword里。int, long, float, double,指针由于大小分别是4,4,4,8,4，直接按照4字节进行内存对齐，也分别占4,4,4,8,4个字节的空间.

3. 各个成员还是按照结构体的声明顺序由低到高排列。

4. 每个成员的地址都是它前面所有成员的大小（包括内存对齐）和基地址的累加。像test.d的地址就是bf909eb4+0x8 = bf909ebc.它前面的成员a ,b在一个dword, c占一个dword，刚好是8.

可能成员类型会由于内存对齐而分辨不出来。其实，用5.1节的方法是可以分辨得出来。有兴趣可以尝试一下。

那么一个结构体如何从汇编里识别出来呢，它究竟有什么特征？

先看一下这个例子。

 struct xuzhina_dump_c05_s3_stru
 {
     char a;
     short b;
     int c;
 };
 
 bool equal( struct xuzhina_dump_c05_s3_stru* first,
         struct xuzhina_dump_c05_s3_stru* second )
 {
     if ( first->a != second->a )
     {
         return false;
     }
     if ( first->b != second->b )
     {
         return false;
     }
     if ( first->c != second->c )
     {
         return false;
     }
     return true;
 }
 
 int main()
 {
     struct xuzhina_dump_c05_s3_stru first = { 'a', 10, 20 };
     struct xuzhina_dump_c05_s3_stru second = {'a', 10, 21 };

     if ( equal( &first, &second ) )
     {
         return 0;
     }
 
     return 1;
 }

汇编代码

(gdb) disassemble equal                                                       
Dump of assembler code for function _Z5equalP24xuzhina_dump_c05_s3_struS0_:
   0x08048470 <+0>:     push   %ebp
   0x08048471 <+1>:     mov    %esp,%ebp
   0x08048473 <+3>:     mov    0x8(%ebp),%eax
   0x08048476 <+6>:     movzbl (%eax),%edx
   0x08048479 <+9>:     mov    0xc(%ebp),%eax
   0x0804847c <+12>:    movzbl (%eax),%eax
   0x0804847f <+15>:    cmp    %al,%dl

   0x08048481 <+17>:    je     0x804848a <_Z5equalP24xuzhina_dump_c05_s3_struS0_+26>

   0x08048483 <+19>:    mov    $0x0,%eax
   0x08048488 <+24>:    jmp    0x80484c0 <_Z5equalP24xuzhina_dump_c05_s3_struS0_+80>

   0x0804848a <+26>:    mov    0x8(%ebp),%eax

   0x0804848d <+29>:    movzwl 0x2(%eax),%edx
   0x08048491 <+33>:    mov    0xc(%ebp),%eax
   0x08048494 <+36>:    movzwl 0x2(%eax),%eax
   0x08048498 <+40>:    cmp    %ax,%dx
   0x0804849b <+43>:    je     0x80484a4 <_Z5equalP24xuzhina_dump_c05_s3_struS0_+52>
   0x0804849d <+45>:    mov    $0x0,%eax
   0x080484a2 <+50>:    jmp    0x80484c0 <_Z5equalP24xuzhina_dump_c05_s3_struS0_+80>

   0x080484a4 <+52>:    mov    0x8(%ebp),%eax

   0x080484a7 <+55>:    mov    0x4(%eax),%edx
   0x080484aa <+58>:    mov    0xc(%ebp),%eax
   0x080484ad <+61>:    mov    0x4(%eax),%eax
   0x080484b0 <+64>:    cmp    %eax,%edx

   0x080484b2 <+66>:    je     0x80484bb <_Z5equalP24xuzhina_dump_c05_s3_struS0_+75>

   0x080484b4 <+68>:    mov    $0x0,%eax
   0x080484b9 <+73>:    jmp    0x80484c0 <_Z5equalP24xuzhina_dump_c05_s3_struS0_+80>

   0x080484bb <+75>:    mov    $0x1,%eax

   0x080484c0 <+80>:    pop    %ebp
   0x080484c1 <+81>:    ret    
End of assembler dump.

(gdb) disassemble main
Dump of assembler code for function main:
   0x080484c2 <+0>:     push   %ebp
   0x080484c3 <+1>:     mov    %esp,%ebp
   0x080484c5 <+3>:     sub    $0x18,%esp

   0x080484c8 <+6>:     movb   $0x61,-0x8(%ebp)
   0x080484cc <+10>:    movw   $0xa,-0x6(%ebp)
   0x080484d2 <+16>:    movl   $0x14,-0x4(%ebp)

   0x080484d9 <+23>:    movb   $0x61,-0x10(%ebp)
   0x080484dd <+27>:    movw   $0xa,-0xe(%ebp)
   0x080484e3 <+33>:    movl   $0x15,-0xc(%ebp)

   0x080484ea <+40>:    lea    -0x10(%ebp),%eax
   0x080484ed <+43>:    mov    %eax,0x4(%esp)
   0x080484f1 <+47>:    lea    -0x8(%ebp),%eax
   0x080484f4 <+50>:    mov    %eax,(%esp)
   0x080484f7 <+53>:    call   0x8048470 <_Z5equalP24xuzhina_dump_c05_s3_struS0_>
   0x080484fc <+58>:    test   %al,%al
   0x080484fe <+60>:    je     0x8048507 <main+69>
   0x08048500 <+62>:    mov    $0x0,%eax
   0x08048505 <+67>:    jmp    0x804850c <main+74>
   0x08048507 <+69>:    mov    $0x1,%eax
   0x0804850c <+74>:    leave  
   0x0804850d <+75>:    ret    
End of assembler dump.

从equal函数的汇编代码可以看到，

1. 结构体都是一个基地址，从它三个成员的寻址看到，都是先把基地址放到某寄存器里。如mov 0x8(%ebp),%eax

2. 结构体成员的访问，都在基址基础上加上前面所有成员的大小。如成员c，前面a和b占四个字节，所以就有“mov 0x8(%ebp),%eax”, “mov 0x4(%eax),%edx”.

3. 每个成员与基址的偏移量，不是构成一个等差数列。

《coredump问题原理探究》四

《coredump问题原理探究》Linux x86版5.5节C风格数据结构内存布局之基本数据类型构成的结构体

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐