版权声明:本文为博主原创文章,未经博主允许不得转载。 https://cloud.tencent.com/developer/article/1344513
在C语言中,基本的数据类型无非是char, short, int,long, float, double及相应的指针。那么它们在内存里是怎样存放的,在汇编指令里显示怎么样的特征呢?在这里就分别来探究一下char, short, int, long, float, double的特征。
先用一个例子来看一下char的特征:
#include <stdio.h>
int main()
{
char c1 = 'a';
char c2 = 'b';
char c3 = 'c';
printf( "addresses of c1 to c3 are (%x, %x, %x)\n",
&c1, &c2, &c3 );
char* p = &c1;
*p++;
p = &c2;
(*p) += 2;
p = &c3;
(*p) += 3;
printf( "address and value of p is ( %x, %c )\n", &p, *p );
return 0;
}
编译它:
g++ -o xuzhina_dump_c5_s1 xuzhina_dump_c5_s1.cpp
用gdb打开它,反汇编这个函数:
(gdb) disassemble main
Dump of assembler code for function main:
0x08048570 <+0>: push %ebp
0x08048571 <+1>: mov %esp,%ebp
0x08048573 <+3>: and $0xfffffff0,%esp
0x08048576 <+6>: sub $0x20,%esp
0x08048579 <+9>: movb $0x61,0x1f(%esp)
0x0804857e <+14>: movb $0x62,0x1e(%esp)
0x08048583 <+19>: movb $0x63,0x1d(%esp)
0x08048588 <+24>: lea 0x1d(%esp),%eax
0x0804858c <+28>: mov %eax,0xc(%esp)
0x08048590 <+32>: lea 0x1e(%esp),%eax
0x08048594 <+36>: mov %eax,0x8(%esp)
0x08048598 <+40>: lea 0x1f(%esp),%eax
0x0804859c <+44>: mov %eax,0x4(%esp)
0x080485a0 <+48>: movl $0x80486c4,(%esp)
0x080485a7 <+55>: call 0x8048440 <printf@plt>
0x080485ac <+60>: lea 0x1f(%esp),%eax
0x080485b0 <+64>: mov %eax,0x18(%esp)
0x080485b4 <+68>: mov 0x18(%esp),%eax
0x080485b8 <+72>: add $0x1,%eax
0x080485bb <+75>: mov %eax,0x18(%esp)
0x080485bf <+79>: lea 0x1e(%esp),%eax
0x080485c3 <+83>: mov %eax,0x18(%esp)
0x080485c7 <+87>: mov 0x18(%esp),%eax
0x080485cb <+91>: mov 0x18(%esp),%edx
0x080485cf <+95>: movzbl (%edx),%edx
0x080485d2 <+98>: add $0x2,%edx
0x080485d5 <+101>: mov %dl,(%eax)
0x080485d7 <+103>: lea 0x1d(%esp),%eax
0x080485db <+107>: mov %eax,0x18(%esp)
0x080485df <+111>: mov 0x18(%esp),%eax
0x080485e3 <+115>: mov 0x18(%esp),%edx
0x080485e7 <+119>: movzbl (%edx),%edx
0x080485ea <+122>: add $0x3,%edx
0x080485ed <+125>: mov %dl,(%eax)
0x080485ef <+127>: mov 0x18(%esp),%eax
0x080485f3 <+131>: movzbl (%eax),%eax
0x080485f6 <+134>: movsbl %al,%eax
0x080485f9 <+137>: mov %eax,0x8(%esp)
0x080485fd <+141>: lea 0x18(%esp),%eax
0x08048601 <+145>: mov %eax,0x4(%esp)
0x08048605 <+149>: movl $0x80486ec,(%esp)
0x0804860c <+156>: call 0x8048440 <printf@plt>
0x08048611 <+161>: mov $0x0,%eax
0x08048616 <+166>: jmp 0x8048620 <main+176>
0x08048618 <+168>: mov %eax,(%esp)
0x0804861b <+171>: call 0x8048460 <_Unwind_Resume@plt>
0x08048620 <+176>: leave
0x08048621 <+177>: ret
End of assembler dump.
在第一次 printf的调用后打断点,即:
0x080485ac <+60>: lea 0x1f(%esp),%eax
然后运行一下,看程序的输出:
(gdb) tbreak *0x080485ac
Temporary breakpoint 1 at 0x80485ac
(gdb) r
Starting program: /home/buckxu/work/5/1/xuzhina_dump_c5_s1
addresses of c1 to c3 are (bffff47f, bffff47e, bffff47d)
Temporary breakpoint 1, 0x080485ac in main ()
(gdb) i r ebp esp
ebp 0xbffff488 0xbffff488
esp 0xbffff460 0xbffff460
(gdb) x /16x $esp
0xbffff460: 0x080486c4 0xbffff47f 0xbffff47e 0xbffff47d
0xbffff470: 0x437c43c4 0x43cb75ec 0x0804863b 0x616263f4
0xbffff480: 0x08048630 0x00000000 0x00000000 0x4362f635
0xbffff490: 0x00000001 0xbffff524 0xbffff52c 0xb7ffef18
由上面来看,可以看到c1-c3的地址分别是0x bffff47f, 0x bffff47e, 0x bffff47d.和ebp的值0xbffff488, esp的值0xbffff460相差不远。虽然在第3章,已经知道局部变量是存放在栈,可从汇编的角度理解是不太友好,用这个例子会更直观点。
由于c1-c3的地址是递减的,从上面可以看到’a’, ‘b’, ‘c’在栈里的顺序是
(gdb) x /3c 0xbffff47d
0xbffff47d: 99 'c' 98 'b' 97 'a'
和预期正好相反。但无论怎样,来看一下,a,b,c的数值表示61,62,63是在哪些指令存放到这几个地址的。
0x08048579 <+9>: movb $0x61,0x1f(%esp)
0x0804857e <+14>: movb $0x62,0x1e(%esp)
0x08048583 <+19>: movb $0x63,0x1d(%esp)
从上面可以看到,存放61,62,63那几条指令,都有一个共同的特征,都是用“movb”。在x86汇编AT&T格式里,“movb”说明操作数是字节。一个char型数据占一个字节。也就是说,“movb”是指操作char型数据的特征。
由于0x08048583到0x080485ac之间栈并没有增加空间,所以,它们esp是一样的(运行一下可以证明),都为0xbffff460。那么, 由
0x08048579 <+9>: movb $0x61,0x1f(%esp)
可知esp+0x1f是’a’的地址。
且在x86汇编中,lea指令就是把内存单元的地址放进寄存器里
那么,
0x080485ac <+60>: lea 0x1f(%esp),%eax
0x080485b0 <+64>: mov %eax,0x18(%esp)
就是把’a’的地址存放到esp+0x18里。可以运行一下来看:
(gdb) ni
0x080485b0 in main ()
(gdb) ni
0x080485b4 in main ()
(gdb) x /x $esp+0x18
0xbffff478: 0xbffff47f
也就是说,esp+0x18的内容是指向某一个地址,即是指针p。
再看一下:
0x080485bf <+79>: lea 0x1e(%esp),%eax
0x080485c3 <+83>: mov %eax,0x18(%esp)
0x080485d7 <+103>: lea 0x1d(%esp),%eax
0x080485db <+107>: mov %eax,0x18(%esp)
可以看出,lea指令是用来获取一个内存单元的地址,也就是说,它就是指针的特征指令。
由上面已经得到了char类型及相应指针的特征,现在把上面的程序改一下,来获取一下short的特征。
#include <stdio.h>
int main()
{
short c1 = 'a';
short c2 = 'b';
short c3 = 'c';
printf( "addresses of c1 to c3 are (%x, %x, %x)\n",
&c1, &c2, &c3 );
short* p = &c1;
*p++;
p = &c2;
(*p) += 2;
p = &c3;
(*p) += 3;
printf( "address and value of p is ( %x, %d )\n", &p, *p );
return 0;
}
再看汇编
(gdb) disassemble main
Dump of assembler code for function main:
0x08048570 <+0>: push %ebp
0x08048571 <+1>: mov %esp,%ebp
0x08048573 <+3>: and $0xfffffff0,%esp
0x08048576 <+6>: sub $0x20,%esp
0x08048579 <+9>: movw $0x61,0x1e(%esp)
0x08048580 <+16>: movw $0x62,0x1c(%esp)
0x08048587 <+23>: movw $0x63,0x1a(%esp)
0x0804858e <+30>: lea 0x1a(%esp),%eax
0x08048592 <+34>: mov %eax,0xc(%esp)
0x08048596 <+38>: lea 0x1c(%esp),%eax
0x0804859a <+42>: mov %eax,0x8(%esp)
0x0804859e <+46>: lea 0x1e(%esp),%eax
0x080485a2 <+50>: mov %eax,0x4(%esp)
0x080485a6 <+54>: movl $0x80486c4,(%esp)
0x080485ad <+61>: call 0x8048440 <printf@plt>
0x080485b2 <+66>: lea 0x1e(%esp),%eax
0x080485b6 <+70>: mov %eax,0x14(%esp)
0x080485ba <+74>: mov 0x14(%esp),%eax
0x080485be <+78>: add $0x2,%eax
0x080485c1 <+81>: mov %eax,0x14(%esp)
0x080485c5 <+85>: lea 0x1c(%esp),%eax
0x080485c9 <+89>: mov %eax,0x14(%esp)
0x080485cd <+93>: mov 0x14(%esp),%eax
0x080485d1 <+97>: mov 0x14(%esp),%edx
0x080485d5 <+101>: movzwl (%edx),%edx
0x080485d8 <+104>: add $0x2,%edx
0x080485db <+107>: mov %dx,(%eax)
0x080485de <+110>: lea 0x1a(%esp),%eax
0x080485e2 <+114>: mov %eax,0x14(%esp)
0x080485e6 <+118>: mov 0x14(%esp),%eax
0x080485ea <+122>: mov 0x14(%esp),%edx
0x080485ee <+126>: movzwl (%edx),%edx
0x080485f1 <+129>: add $0x3,%edx
0x080485f4 <+132>: mov %dx,(%eax)
0x080485f7 <+135>: mov 0x14(%esp),%eax
0x080485fb <+139>: movzwl (%eax),%eax
0x080485fe <+142>: cwtl
0x080485ff <+143>: mov %eax,0x8(%esp)
0x08048603 <+147>: lea 0x14(%esp),%eax
0x08048607 <+151>: mov %eax,0x4(%esp)
0x0804860b <+155>: movl $0x80486ec,(%esp)
0x08048612 <+162>: call 0x8048440 <printf@plt>
0x08048617 <+167>: mov $0x0,%eax
0x0804861c <+172>: jmp 0x8048626 <main+182>
0x0804861e <+174>: mov %eax,(%esp)
0x08048621 <+177>: call 0x8048460 <_Unwind_Resume@plt>
0x08048626 <+182>: leave
0x08048627 <+183>: ret
End of assembler dump.
上面汇编和char类型的程序的差不多,差别只是在标红出来的:
1. short是两个字节的,所以,movw就是它的特征码。
2. short*指针和char*指针的特征一样。
如果继续对int, long,float, double这些类型进行类似的探究,会得出类似的结论。综合一下,如下表
类型 | 特征 |
---|---|
char | movb |
short | movw |
int | movl |
long | movl(在32-bit Linux下). movq(在64-bit Linux下) |
指针 | lea |