Wednesday, May 19, 2010

memcpy part 2

Greetings everyone,


Just playing with clang/llvm (1.1/2.7) [memcpy 65 bytes string]


clang/llvm 1.1/2.7
llvmc -x c++ -O3

I   refs:      28,117,480
I1  misses:           718
L2i misses:           711
I1  miss rate:       0.00%
L2i miss rate:       0.00%
 
D   refs:      25,731,989  (13,528,145 rd   + 12,203,844 wr)
D1  misses:         6,903  (     6,512 rd   +        391 wr)
L2d misses:         4,108  (     3,840 rd   +        268 wr)
D1  miss rate:        0.0% (       0.0%     +        0.0%  )
L2d miss rate:        0.0% (       0.0%     +        0.0%  )
 
L2 refs:            7,621  (     7,230 rd   +        391 wr)
L2 misses:          4,819  (     4,551 rd   +        268 wr)
L2 miss rate:         0.0% (       0.0%     +        0.0%  )


gcc 4.5
g++ -x c++ -O3

I   refs:      14,482,419
I1  misses:           713
L2i misses:           706
I1  miss rate:       0.00%
L2i miss rate:       0.00%


D   refs:      18,389,586  (9,332,878 rd   + 9,056,708 wr)
D1  misses:         6,823  (    6,487 rd   +       336 wr)
L2d misses:         4,066  (    3,833 rd   +       233 wr)
D1  miss rate:        0.0% (      0.0%     +       0.0%  )
L2d miss rate:        0.0% (      0.0%     +       0.0%  )


L2 refs:            7,536  (    7,200 rd   +       336 wr)
L2 misses:          4,772  (    4,539 rd   +       233 wr)
L2 miss rate:         0.0% (      0.0%     +       0.0%  )


And here is the dump of llmv generated main

   0x08048430 <+0>:    push   %edi
   0x08048431 <+1>:    push   %esi
   0x08048432 <+2>:    sub    $0x100c,%esp
   0x08048438 <+8>:    lea    0xc(%esp),%esi
   0x0804843c <+12>:    mov    $0x7ffff,%edi
   0x08048441 <+17>:    mov    %esi,(%esp)
   0x08048444 <+20>:    movl   $0x1000,0x8(%esp)
   0x0804844c <+28>:    movl   $0x0,0x4(%esp)
   0x08048454 <+36>:    call   0x804833c <memset@plt>
   0x08048459 <+41>:    lea    0x0(%esi,%eiz,1),%esi
   0x08048460 <+48>:    mov    %esi,(%esp)
   0x08048463 <+51>:    movl   $0x3f,0x8(%esp)
   0x0804846b <+59>:    movl   $0x8048560,0x4(%esp)
   0x08048473 <+67>:    call   0x804835c <memcpy@plt>
   0x08048478 <+72>:    dec    %edi
   0x08048479 <+73>:    jne    0x8048460
   0x0804847b <+75>:    xor    %eax,%eax
   0x0804847d <+77>:    add    $0x100c,%esp
   0x08048483 <+83>:    pop    %esi
   0x08048484 <+84>:    pop    %edi
   0x08048485 <+85>:    ret   

Frankly, I thought llvm will perform better (unless I'm doing something wrong).

Friday, May 14, 2010

printf

Greetings everyone,

Interesting reading
where the printf rubber meets the road

Wednesday, May 12, 2010

just a couple of prime numbers

Greetings everyone,

gcc-4.5/libstdc++-v3/src/hashtable-aux.cc
 namespace __detail
{
  extern const unsigned long __prime_list[] = // 256 + 1 or 256 + 48 + 1
  {
    2ul, 3ul, 5ul, 7ul, 11ul, 13ul, 17ul, 19ul, 23ul, 29ul, 31ul,
    37ul, 41ul, 43ul, 47ul, 53ul, ....

Thursday, May 6, 2010

memcpy

Greetings everyone,

gcc 4.5

Suppose we have the following code
int main(int argc, char *argv[])
{
    const char *x = "I'll never write useless code! I'll never write useless code! ";
    char y[4096] = {0,}; /*nevermind*/

    memcpy(y, x, strlen(x));
    printf("%s", y);
   
    return 0;
}

Now, let's "rework" it to
    const char *x = "I'll never write useless code! I'll never write useless code! I'll never write useless code! ";

What's the difference?

/*x86, cpu family:6, model:15, cache_alignment:64*/
g++ -O2

For the 64 bytes string we have 16 movl instructions:
(or movl, ..., movl, movw)

Dump of assembler code for function main:
   0x08048420 <+0>:    push   %ebp
   0x08048421 <+1>:    xor    %eax,%eax
   0x08048423 <+3>:    mov    %esp,%ebp
   0x08048425 <+5>:    and    $0xfffffff0,%esp
   0x08048428 <+8>:    push   %edi
   0x08048429 <+9>:    mov    $0x400,%ecx
   0x0804842e <+14>:    sub    $0x101c,%esp
   0x08048434 <+20>:    lea    0x10(%esp),%edi
   0x08048438 <+24>:    rep stos %eax,%es:(%edi)
   0x0804843a <+26>:    lea    0x10(%esp),%eax
   0x0804843e <+30>:    movl   $0x6c6c2749,0x10(%esp)
   0x08048446 <+38>:    movl   $0x76656e20,0x14(%esp)
   0x0804844e <+46>:    movl   $0x77207265,0x18(%esp)
   0x08048456 <+54>:    movl   $0x65746972,0x1c(%esp)
   0x0804845e <+62>:    movl   $0x75206120,0x20(%esp)
   0x08048466 <+70>:    movl   $0x656c6573,0x24(%esp)
   0x0804846e <+78>:    movl   $0x63207373,0x28(%esp)
   0x08048476 <+86>:    movl   $0x2165646f,0x2c(%esp)
   0x0804847e <+94>:    movl   $0x6c6c2749,0x30(%esp)
   0x08048486 <+102>:    movl   $0x76656e20,0x34(%esp)
   0x0804848e <+110>:    movl   $0x77207265,0x38(%esp)
   0x08048496 <+118>:    movl   $0x65746972,0x3c(%esp)
   0x0804849e <+126>:    movl   $0x75206120,0x40(%esp)
   0x080484a6 <+134>:    movl   $0x656c6573,0x44(%esp)
   0x080484ae <+142>:    movl   $0x63207373,0x48(%esp)
   0x080484b6 <+150>:    movl   $0x2165646f,0x4c(%esp)
   0x080484be <+158>:    mov    %eax,0x4(%esp)
   0x080484c2 <+162>:    movl   $0x80485a0,(%esp)
   0x080484c9 <+169>:    call   0x8048348
   0x080484ce <+174>:    add    $0x101c,%esp
   0x080484d4 <+180>:    xor    %eax,%eax
   0x080484d6 <+182>:    pop    %edi
   0x080484d7 <+183>:    mov    %ebp,%esp
   0x080484d9 <+185>:    pop    %ebp
   0x080484da <+186>:    ret



And this one is for >64 bytes string:

Dump of assembler code for function main:
   0x08048420 <+0>:    push   %ebp
   0x08048421 <+1>:    xor    %eax,%eax
   0x08048423 <+3>:    mov    %esp,%ebp
   0x08048425 <+5>:    and    $0xfffffff0,%esp
   0x08048428 <+8>:    sub    $0x1020,%esp
   0x0804842e <+14>:    mov    $0x400,%ecx
   0x08048433 <+19>:    mov    %edi,0x101c(%esp)
   0x0804843a <+26>:    lea    0x10(%esp),%edi
   0x0804843e <+30>:    mov    %esi,0x1018(%esp)
   0x08048445 <+37>:    mov    $0x8048540,%esi
   0x0804844a <+42>:    rep stos %eax,%es:(%edi)
   0x0804844c <+44>:    lea    0x10(%esp),%eax
   0x08048450 <+48>:    mov    %eax,%edi
   0x08048452 <+50>:    mov    $0x20,%cl
   0x08048454 <+52>:    rep movsl %ds:(%esi),%es:(%edi)
   0x08048456 <+54>:    mov    %eax,0x4(%esp)
   0x0804845a <+58>:    movl   $0x80485c4,(%esp)
   0x08048461 <+65>:    call   0x8048348
   0x08048466 <+70>:    xor    %eax,%eax
   0x08048468 <+72>:    mov    0x1018(%esp),%esi
   0x0804846f <+79>:    mov    0x101c(%esp),%edi
   0x08048476 <+86>:    mov    %ebp,%esp
   0x08048478 <+88>:    pop    %ebp
   0x08048479 <+89>:    ret 


In numbers: On my machine copying of 64 bytes string costs min 60 (avg. 73) cycles and 65 bytes string - min 96 (avg. 113) cycles.

Wednesday, May 5, 2010

delete vs delete[]

Greetings everyone,

gcc 4.4/gcc 4.5

int
*p = NULL;
delete p;
0x08049fe5 <+0>: push %ebp
0x08049fe6 <+1>: mov %esp,%ebp
0x08049fe8 <+3>: and $0xfffffff0,%esp
0x08049feb <+6>: sub $0x20,%esp
0x08049fee <+9>: call 0x8048fb4 <mcount@plt>
0x08049ff3 <+14>: movl $0x0,0x1c(%esp)
0x08049ffb <+22>: mov 0x1c(%esp),%eax
0x08049fff <+26>: mov %eax,(%esp)
0x0804a002 <+29>: call 0x8048e54 <_ZdlPv@plt>
0x0804a007 <+34>: mov $0x1,%eax
0x0804a00c <+39>: leave
0x0804a00d <+40>: ret
 
int *p = NULL;
delete[] p;
0x08049fe5 <+0>: push %ebp
0x08049fe6 <+1>: mov %esp,%ebp
0x08049fe8 <+3>: and $0xfffffff0,%esp
0x08049feb <+6>: sub $0x20,%esp
0x08049fee <+9>: call 0x8048fa4 <mcount@plt>
0x08049ff3 <+14>: movl $0x0,0x1c(%esp)
0x08049ffb <+22>: cmpl $0x0,0x1c(%esp)
0x0804a000 <+27>: je 0x804a00e <main+41>
0x0804a002 <+29>: mov 0x1c(%esp),%eax
0x0804a006 <+33>: mov %eax,(%esp)
0x0804a009 <+36>: call 0x8048fc4 <_ZdaPv@plt>
0x0804a00e <+41>: mov $0x1,%eax
0x0804a013 <+46>: leave
0x0804a014 <+47>: ret
So, we are not calling ::operator delete[] in case of NULL-pointer.
0x08049ffb <+22>: cmpl $0x0,0x1c(%esp)
0x0804a000 <+27>: je 0x804a00e <main+41>

Any comments?