gcc 4.5
Suppose we have the following code
int main(int argc, char *argv[])
{
const char *x = "I'll never write useless code! I'll never write useless code! ";
char y[4096] = {0,}; /*nevermind*/
memcpy(y, x, strlen(x));
printf("%s", y);
return 0;
}
Now, let's "rework" it to
const char *x = "I'll never write useless code! I'll never write useless code! I'll never write useless code! ";
What's the difference?
/*x86, cpu family:6, model:15, cache_alignment:64*/
g++ -O2
For the 64 bytes string we have 16 movl instructions:
(or movl, ..., movl, movw)
Dump of assembler code for function main:
0x08048420 <+0>: push %ebp
0x08048421 <+1>: xor %eax,%eax
0x08048423 <+3>: mov %esp,%ebp
0x08048425 <+5>: and $0xfffffff0,%esp
0x08048428 <+8>: push %edi
0x08048429 <+9>: mov $0x400,%ecx
0x0804842e <+14>: sub $0x101c,%esp
0x08048434 <+20>: lea 0x10(%esp),%edi
0x08048438 <+24>: rep stos %eax,%es:(%edi)
0x0804843a <+26>: lea 0x10(%esp),%eax
0x0804843e <+30>: movl $0x6c6c2749,0x10(%esp)
0x08048446 <+38>: movl $0x76656e20,0x14(%esp)
0x0804844e <+46>: movl $0x77207265,0x18(%esp)
0x08048456 <+54>: movl $0x65746972,0x1c(%esp)
0x0804845e <+62>: movl $0x75206120,0x20(%esp)
0x08048466 <+70>: movl $0x656c6573,0x24(%esp)
0x0804846e <+78>: movl $0x63207373,0x28(%esp)
0x08048476 <+86>: movl $0x2165646f,0x2c(%esp)
0x0804847e <+94>: movl $0x6c6c2749,0x30(%esp)
0x08048486 <+102>: movl $0x76656e20,0x34(%esp)
0x0804848e <+110>: movl $0x77207265,0x38(%esp)
0x08048496 <+118>: movl $0x65746972,0x3c(%esp)
0x0804849e <+126>: movl $0x75206120,0x40(%esp)
0x080484a6 <+134>: movl $0x656c6573,0x44(%esp)
0x080484ae <+142>: movl $0x63207373,0x48(%esp)
0x080484b6 <+150>: movl $0x2165646f,0x4c(%esp)
0x080484be <+158>: mov %eax,0x4(%esp)
0x080484c2 <+162>: movl $0x80485a0,(%esp)
0x080484c9 <+169>: call 0x8048348
0x080484ce <+174>: add $0x101c,%esp
0x080484d4 <+180>: xor %eax,%eax
0x080484d6 <+182>: pop %edi
0x080484d7 <+183>: mov %ebp,%esp
0x080484d9 <+185>: pop %ebp
0x080484da <+186>: ret
And this one is for >64 bytes string:
Dump of assembler code for function main:
0x08048420 <+0>: push %ebp
0x08048421 <+1>: xor %eax,%eax
0x08048423 <+3>: mov %esp,%ebp
0x08048425 <+5>: and $0xfffffff0,%esp
0x08048428 <+8>: sub $0x1020,%esp
0x0804842e <+14>: mov $0x400,%ecx
0x08048433 <+19>: mov %edi,0x101c(%esp)
0x0804843a <+26>: lea 0x10(%esp),%edi
0x0804843e <+30>: mov %esi,0x1018(%esp)
0x08048445 <+37>: mov $0x8048540,%esi
0x0804844a <+42>: rep stos %eax,%es:(%edi)
0x0804844c <+44>: lea 0x10(%esp),%eax
0x08048450 <+48>: mov %eax,%edi
0x08048452 <+50>: mov $0x20,%cl
0x08048454 <+52>: rep movsl %ds:(%esi),%es:(%edi)
0x08048456 <+54>: mov %eax,0x4(%esp)
0x0804845a <+58>: movl $0x80485c4,(%esp)
0x08048461 <+65>: call 0x8048348
0x08048466 <+70>: xor %eax,%eax
0x08048468 <+72>: mov 0x1018(%esp),%esi
0x0804846f <+79>: mov 0x101c(%esp),%edi
0x08048476 <+86>: mov %ebp,%esp
0x08048478 <+88>: pop %ebp
0x08048479 <+89>: ret
In numbers: On my machine copying of 64 bytes string costs min 60 (avg. 73) cycles and 65 bytes string - min 96 (avg. 113) cycles.
No comments:
Post a Comment