版权声明:本文为博主原创文章,未经博主允许不得转载。 https://cloud.tencent.com/developer/article/1344515
在了解了上面的规律后,现在开始来解决本章一开头的问题:
(gdb) bt
#0 0x4365b569 in vfprintf () from /lib/libc.so.6
#1 0x436629ff in printf () from /lib/libc.so.6
#2 0x080485b9 in main ()
看一下main函数的汇编:
(gdb) disassemble main
Dump of assembler code for function main:
0x08048500 <+0>: push %ebp
0x08048501 <+1>: mov %esp,%ebp
0x08048503 <+3>: and $0xfffffff0,%esp
0x08048506 <+6>: sub $0x20,%esp
0x08048509 <+9>: movl $0x0,0x1c(%esp)
0x08048511 <+17>: jmp 0x80485bf <main+191>
0x08048516 <+22>: mov 0x1c(%esp),%eax
0x0804851a <+26>: lea 0x0(,%eax,4),%edx
0x08048521 <+33>: mov 0xc(%ebp),%eax
0x08048524 <+36>: add %edx,%eax
0x08048526 <+38>: mov (%eax),%eax
0x08048528 <+40>: mov %eax,(%esp)
0x0804852b <+43>: call 0x80483d0 <strlen@plt>
0x08048530 <+48>: mov %eax,0x18(%esp)
0x08048534 <+52>: mov 0x18(%esp),%eax
0x08048538 <+56>: cmp $0x1,%eax
0x0804853b <+59>: je 0x8048570 <main+112>
0x0804853d <+61>: cmp $0x2,%eax
0x08048540 <+64>: je 0x804858f <main+143>
0x08048542 <+66>: test %eax,%eax
0x08048544 <+68>: jne 0x80485a5 <main+165>
0x08048546 <+70>: mov 0x1c(%esp),%eax
0x0804854a <+74>: lea 0x0(,%eax,4),%edx
0x08048551 <+81>: mov 0xc(%ebp),%eax
0x08048554 <+84>: add %edx,%eax
0x08048556 <+86>: mov (%eax),%eax
0x08048558 <+88>: movzbl (%eax),%eax
0x0804855b <+91>: movsbl %al,%eax
0x0804855e <+94>: mov %eax,0x4(%esp)
0x08048562 <+98>: movl $0x8048674,(%esp)
0x08048569 <+105>: call 0x80483e0 <printf@plt>
0x0804856e <+110>: jmp 0x80485ba <main+186>
0x08048570 <+112>: mov 0x1c(%esp),%eax
0x08048574 <+116>: add $0x1,%eax
0x08048577 <+119>: lea 0x0(,%eax,4),%edx
0x0804857e <+126>: mov 0xc(%ebp),%eax
0x08048581 <+129>: add %edx,%eax
0x08048583 <+131>: mov (%eax),%eax
0x08048585 <+133>: mov %eax,(%esp)
0x08048588 <+136>: call 0x80483f0 <puts@plt>
0x0804858d <+141>: jmp 0x80485ba <main+186>
0x0804858f <+143>: mov 0x1c(%esp),%eax
0x08048593 <+147>: mov %eax,0x4(%esp)
0x08048597 <+151>: movl $0x8048678,(%esp)
0x0804859e <+158>: call 0x80483e0 <printf@plt>
0x080485a3 <+163>: jmp 0x80485ba <main+186>
0x080485a5 <+165>: mov 0x1c(%esp),%eax
0x080485a9 <+169>: mov %eax,0x4(%esp)
0x080485ad <+173>: movl $0x804867c,(%esp)
0x080485b4 <+180>: call 0x80483e0 <printf@plt>
0x080485b9 <+185>: nop
0x080485ba <+186>: addl $0x1,0x1c(%esp)
0x080485bf <+191>: mov 0x1c(%esp),%eax
0x080485c3 <+195>: cmp 0x8(%ebp),%eax
0x080485c6 <+198>: setl %al
0x080485c9 <+201>: test %al,%al
0x080485cb <+203>: jne 0x8048516 <main+22>
0x080485d1 <+209>: mov $0x0,%eax
0x080485d6 <+214>: leave
0x080485d7 <+215>: ret
End of assembler dump.
由
0x080485cb <+203>: jne 0x8048516 <main+22>
可知,0x8048516到0x080485cb构成一个循环。
而0x080486cb的判断条件:
0x080485bf <+191>: mov 0x1c(%esp),%eax
0x080485c3 <+195>: cmp 0x8(%ebp),%eax
0x080485c6 <+198>: setl %al
0x080485c9 <+201>: test %al,%al
里,提到ebp + 8.,由于main函数的原型第一个参数是argc,这在第三章“函数参数”有提及的。所以,上面的语句是判断esp+0x1c的值是否和argc相等,如果不是,就继续循环,否则跳出循环。假定esp+0x1c这个变量命名为cnt。
又由
0x080485ba <+186>: addl $0x1,0x1c(%esp)
可翻译成 cnt++,可知,
cnt一开初应该是小于argc的。每次递增都要判断一次。
又由
0x08048509 <+9>: movl $0x0,0x1c(%esp)
0x08048511 <+17>: jmp 0x80485bf <main+191>
可知,cnt的初始值为0,且一初始化之后就跳转到0x080485bf和argc比较。
而又由
0x080485d1 <+209>: mov $0x0,%eax
0x080485d6 <+214>: leave
0x080485d7 <+215>: ret
结合第三章“返回值”那一节可知,main函数无论什么情况都返回0。
所以,整个main函数可以翻译成这样:
int main( int argc, char* argv[] )
{
int cnt = 0;
while ( cnt < argc )
{
0x08048516 <+22>: mov 0x1c(%esp),%eax
0x0804851a <+26>: lea 0x0(,%eax,4),%edx
0x08048521 <+33>: mov 0xc(%ebp),%eax
0x08048524 <+36>: add %edx,%eax
0x08048526 <+38>: mov (%eax),%eax
0x08048528 <+40>: mov %eax,(%esp)
0x0804852b <+43>: call 0x80483d0 <strlen@plt>
0x08048530 <+48>: mov %eax,0x18(%esp)
0x08048534 <+52>: mov 0x18(%esp),%eax
0x08048538 <+56>: cmp $0x1,%eax
0x0804853b <+59>: je 0x8048570 <main+112>
0x0804853d <+61>: cmp $0x2,%eax
0x08048540 <+64>: je 0x804858f <main+143>
0x08048542 <+66>: test %eax,%eax
0x08048544 <+68>: jne 0x80485a5 <main+165>
0x08048546 <+70>: mov 0x1c(%esp),%eax
0x0804854a <+74>: lea 0x0(,%eax,4),%edx
0x08048551 <+81>: mov 0xc(%ebp),%eax
0x08048554 <+84>: add %edx,%eax
0x08048556 <+86>: mov (%eax),%eax
0x08048558 <+88>: movzbl (%eax),%eax
0x0804855b <+91>: movsbl %al,%eax
0x0804855e <+94>: mov %eax,0x4(%esp)
0x08048562 <+98>: movl $0x8048674,(%esp)
0x08048569 <+105>: call 0x80483e0 <printf@plt>
0x0804856e <+110>: jmp 0x80485ba <main+186>
0x08048570 <+112>: mov 0x1c(%esp),%eax
0x08048574 <+116>: add $0x1,%eax
0x08048577 <+119>: lea 0x0(,%eax,4),%edx
0x0804857e <+126>: mov 0xc(%ebp),%eax
0x08048581 <+129>: add %edx,%eax
0x08048583 <+131>: mov (%eax),%eax
0x08048585 <+133>: mov %eax,(%esp)
0x08048588 <+136>: call 0x80483f0 <puts@plt>
0x0804858d <+141>: jmp 0x80485ba <main+186>
0x0804858f <+143>: mov 0x1c(%esp),%eax
0x08048593 <+147>: mov %eax,0x4(%esp)
0x08048597 <+151>: movl $0x8048678,(%esp)
0x0804859e <+158>: call 0x80483e0 <printf@plt>
0x080485a3 <+163>: jmp 0x80485ba <main+186>
0x080485a5 <+165>: mov 0x1c(%esp),%eax
0x080485a9 <+169>: mov %eax,0x4(%esp)
0x080485ad <+173>: movl $0x804867c,(%esp)
0x080485b4 <+180>: call 0x80483e0 <printf@plt>
0x080485b9 <+185>: nop
cnt++;
}
return 0;
}
分析一下上面汇编块的跳转语句
由
0x08048538 <+56>: cmp $0x1,%eax
0x0804853b <+59>: je 0x8048570 <main+112>
可知,
0x08048570 <+112>: mov 0x1c(%esp),%eax
0x08048574 <+116>: add $0x1,%eax
0x08048577 <+119>: lea 0x0(,%eax,4),%edx
0x0804857e <+126>: mov 0xc(%ebp),%eax
0x08048581 <+129>: add %edx,%eax
0x08048583 <+131>: mov (%eax),%eax
0x08048585 <+133>: mov %eax,(%esp)
0x08048588 <+136>: call 0x80483f0 <puts@plt>
0x0804858d <+141>: jmp 0x80485ba <main+186>
是在eax等于1的情况下的代码块。
同样分析
0x0804853d <+61>: cmp $0x2,%eax
0x08048540 <+64>: je 0x804858f <main+143>
0x08048542 <+66>: test %eax,%eax
0x08048544 <+68>: jne 0x80485a5 <main+165>
可知
0x0804858f <+143>: mov 0x1c(%esp),%eax
0x08048593 <+147>: mov %eax,0x4(%esp)
0x08048597 <+151>: movl $0x8048678,(%esp)
0x0804859e <+158>: call 0x80483e0 <printf@plt>
0x080485a3 <+163>: jmp 0x80485ba <main+186>
是在eax为1的情况下的代码块。
0x08048546 <+70>: mov 0x1c(%esp),%eax
0x0804854a <+74>: lea 0x0(,%eax,4),%edx
0x08048551 <+81>: mov 0xc(%ebp),%eax
0x08048554 <+84>: add %edx,%eax
0x08048556 <+86>: mov (%eax),%eax
0x08048558 <+88>: movzbl (%eax),%eax
0x0804855b <+91>: movsbl %al,%eax
0x0804855e <+94>: mov %eax,0x4(%esp)
0x08048562 <+98>: movl $0x8048674,(%esp)
0x08048569 <+105>: call 0x80483e0 <printf@plt>
0x0804856e <+110>: jmp 0x80485ba <main+186>
是在eax为0的情况下的代码块。
0x080485a5 <+165>: mov 0x1c(%esp),%eax
0x080485a9 <+169>: mov %eax,0x4(%esp)
0x080485ad <+173>: movl $0x804867c,(%esp)
0x080485b4 <+180>: call 0x80483e0 <printf@plt>
0x080485b9 <+185>: nop
是在eax不为0,1,2这三种情况下的代码块。由于这几个判断都是特定的整数,所以最好用switch结构来还原。
而对于
0x0804856e <+110>: jmp 0x80485ba <main+186>
这条指令所跳转的地方,刚好是
0x080485ba <+186>: addl $0x1,0x1c(%esp)
即cnt++;
而eax的则是由
0x0804852b <+43>: call 0x80483d0 <strlen@plt>
0x08048530 <+48>: mov %eax,0x18(%esp)
0x08048534 <+52>: mov 0x18(%esp),%eax
得来的,根据第三章“返回值”那一节,可以知道eax应该是strlen函数的返回值,命名为len。
而在
0x08048516 <+22>: mov 0x1c(%esp),%eax
0x0804851a <+26>: lea 0x0(,%eax,4),%edx
0x08048521 <+33>: mov 0xc(%ebp),%eax
0x08048524 <+36>: add %edx,%eax
0x08048526 <+38>: mov (%eax),%eax
0x08048528 <+40>: mov %eax,(%esp)
由于esp+0x1c已经命名为cnt了,ebp+0xC为main函数第二个参数argv。那么这一段汇编的意思是取argvcnt的值,并把它压入栈里。
所以main函数又会变成这样:
int main( int argc, char* argv[] )
{
int cnt = 0;
while ( cnt < argc )
{
size_t len = strlen( argv[cnt] );
switch ( len )
{
case 0:
{
0x08048546 <+70>: mov 0x1c(%esp),%eax
0x0804854a <+74>: lea 0x0(,%eax,4),%edx
0x08048551 <+81>: mov 0xc(%ebp),%eax
0x08048554 <+84>: add %edx,%eax
0x08048556 <+86>: mov (%eax),%eax
0x08048558 <+88>: movzbl (%eax),%eax
0x0804855b <+91>: movsbl %al,%eax
0x0804855e <+94>: mov %eax,0x4(%esp)
0x08048562 <+98>: movl $0x8048674,(%esp)
0x08048569 <+105>: call 0x80483e0 <printf@plt>
break;
}
case 1:
{
0x08048570 <+112>: mov 0x1c(%esp),%eax
0x08048574 <+116>: add $0x1,%eax
0x08048577 <+119>: lea 0x0(,%eax,4),%edx
0x0804857e <+126>: mov 0xc(%ebp),%eax
0x08048581 <+129>: add %edx,%eax
0x08048583 <+131>: mov (%eax),%eax
0x08048585 <+133>: mov %eax,(%esp)
0x08048588 <+136>: call 0x80483f0 <puts@plt>
break;
}
case 2:
{
0x0804858f <+143>: mov 0x1c(%esp),%eax
0x08048593 <+147>: mov %eax,0x4(%esp)
0x08048597 <+151>: movl $0x8048678,(%esp)
0x0804859e <+158>: call 0x80483e0 <printf@plt>
break;
}
default:
{
0x080485a5 <+165>: mov 0x1c(%esp),%eax
0x080485a9 <+169>: mov %eax,0x4(%esp)
0x080485ad <+173>: movl $0x804867c,(%esp)
0x080485b4 <+180>: call 0x80483e0 <printf@plt>
0x080485b9 <+185>: nop
break;
}
}
cnt++;
}
return 0;
}
看一下case 0情况的汇编:
0x08048546 <+70>: mov 0x1c(%esp),%eax
0x0804854a <+74>: lea 0x0(,%eax,4),%edx
0x08048551 <+81>: mov 0xc(%ebp),%eax
0x08048554 <+84>: add %edx,%eax
0x08048556 <+86>: mov (%eax),%eax
0x08048558 <+88>: movzbl (%eax),%eax
0x0804855b <+91>: movsbl %al,%eax
0x0804855e <+94>: mov %eax,0x4(%esp)
0x08048562 <+98>: movl $0x8048674,(%esp)
0x08048569 <+105>: call 0x80483e0 <printf@plt>
由于printf的第一个参数是格式字符串,那么看一下0x8048674存放着什么内容:
(gdb) x /s 0x8048674
0x8048674 <__dso_handle+4>: "%c\n"
而
0x08048546 <+70>: mov 0x1c(%esp),%eax
0x0804854a <+74>: lea 0x0(,%eax,4),%edx
0x08048551 <+81>: mov 0xc(%ebp),%eax
0x08048554 <+84>: add %edx,%eax
又在分析strlen时已经知道是指argvcnt+1,所以,
0x08048556 <+86>: mov (%eax),%eax
0x08048558 <+88>: movzbl (%eax),%eax
0x0804855b <+91>: movsbl %al,%eax
就是取argvcnt+1的值。
那么,case 0的汇编可翻译成
printf( “%c\n”, argv[cnt][0] );
也就是说,main函数可以变成这样:
int main( int argc, char* argv[] )
{
int cnt = 0;
while ( cnt < argc )
{
size_t len = strlen( argv[cnt] );
switch ( len )
{
case 0:
{
printf( “%c\n”, argv[cnt][0] );
break;
}
case 1:
{
0x08048570 <+112>: mov 0x1c(%esp),%eax
0x08048574 <+116>: add $0x1,%eax
0x08048577 <+119>: lea 0x0(,%eax,4),%edx
0x0804857e <+126>: mov 0xc(%ebp),%eax
0x08048581 <+129>: add %edx,%eax
0x08048583 <+131>: mov (%eax),%eax
0x08048585 <+133>: mov %eax,(%esp)
0x08048588 <+136>: call 0x80483f0 <puts@plt>
break;
}
case 2:
{
0x0804858f <+143>: mov 0x1c(%esp),%eax
0x08048593 <+147>: mov %eax,0x4(%esp)
0x08048597 <+151>: movl $0x8048678,(%esp)
0x0804859e <+158>: call 0x80483e0 <printf@plt>
break;
}
default:
{
0x080485a5 <+165>: mov 0x1c(%esp),%eax
0x080485a9 <+169>: mov %eax,0x4(%esp)
0x080485ad <+173>: movl $0x804867c,(%esp)
0x080485b4 <+180>: call 0x80483e0 <printf@plt>
0x080485b9 <+185>: nop
break;
}
}
cnt++;
}
return 0;
}
按照case 0的情况可以分析出其它几个如下:
case 1:
{
puts( argv[cnt+1] );
break;
}
case 2:
{
printf( “%d\n”, cnt );
break;
}
default:
{
printf( “%s\n”, cnt );
break;
}
即整个main函数如下:
int main( int argc, char* argv[] )
{
int cnt = 0;
while ( cnt < argc )
{
size_t len = strlen( argv[cnt+1] );
switch ( len )
{
case 0:
{
printf( “%c\n”, argv[cnt+1][0] );
break;
}
case 1:
{
puts( argv[cnt+2] );
break;
}
case 2:
{
printf( “%d\n”, cnt );
break;
}
default:
{
printf( “%s\n”, cnt );
break;
}
}
cnt++;
}
return 0;
}
由于崩溃指令地址0x080485b9是在一段中
default:
{
0x080485a5 <+165>: mov 0x1c(%esp),%eax
0x080485a9 <+169>: mov %eax,0x4(%esp)
0x080485ad <+173>: movl $0x804867c,(%esp)
0x080485b4 <+180>: call 0x80483e0 <printf@plt>
0x080485b9 <+185>: nop
break;
}
可知,是由
default:
{
printf( “%s\n”, cnt );
break;
}
导致崩溃的。
对比一下源代码,可见非常吻合。
#include <stdio.h>
#include <string.h>
int main(int argc, char* argv[] )
{
for ( int i = 0; i < argc; i++ )
{
int len = strlen( argv[i] );
switch ( len )
{
case 0:
printf( "%c\n", argv[i][0] );
break;
case 1:
printf( "%s\n", argv[i+1] );
break;
case 2:
printf( "%d\n", i );
break;
default:
printf( "%s\n", i );
break;
}
}
return 0;
}