Wednesday, May 19, 2010

memcpy part 2

Greetings everyone,


Just playing with clang/llvm (1.1/2.7) [memcpy 65 bytes string]


clang/llvm 1.1/2.7
llvmc -x c++ -O3

I   refs:      28,117,480
I1  misses:           718
L2i misses:           711
I1  miss rate:       0.00%
L2i miss rate:       0.00%
 
D   refs:      25,731,989  (13,528,145 rd   + 12,203,844 wr)
D1  misses:         6,903  (     6,512 rd   +        391 wr)
L2d misses:         4,108  (     3,840 rd   +        268 wr)
D1  miss rate:        0.0% (       0.0%     +        0.0%  )
L2d miss rate:        0.0% (       0.0%     +        0.0%  )
 
L2 refs:            7,621  (     7,230 rd   +        391 wr)
L2 misses:          4,819  (     4,551 rd   +        268 wr)
L2 miss rate:         0.0% (       0.0%     +        0.0%  )


gcc 4.5
g++ -x c++ -O3

I   refs:      14,482,419
I1  misses:           713
L2i misses:           706
I1  miss rate:       0.00%
L2i miss rate:       0.00%


D   refs:      18,389,586  (9,332,878 rd   + 9,056,708 wr)
D1  misses:         6,823  (    6,487 rd   +       336 wr)
L2d misses:         4,066  (    3,833 rd   +       233 wr)
D1  miss rate:        0.0% (      0.0%     +       0.0%  )
L2d miss rate:        0.0% (      0.0%     +       0.0%  )


L2 refs:            7,536  (    7,200 rd   +       336 wr)
L2 misses:          4,772  (    4,539 rd   +       233 wr)
L2 miss rate:         0.0% (      0.0%     +       0.0%  )


And here is the dump of llmv generated main

   0x08048430 <+0>:    push   %edi
   0x08048431 <+1>:    push   %esi
   0x08048432 <+2>:    sub    $0x100c,%esp
   0x08048438 <+8>:    lea    0xc(%esp),%esi
   0x0804843c <+12>:    mov    $0x7ffff,%edi
   0x08048441 <+17>:    mov    %esi,(%esp)
   0x08048444 <+20>:    movl   $0x1000,0x8(%esp)
   0x0804844c <+28>:    movl   $0x0,0x4(%esp)
   0x08048454 <+36>:    call   0x804833c <memset@plt>
   0x08048459 <+41>:    lea    0x0(%esi,%eiz,1),%esi
   0x08048460 <+48>:    mov    %esi,(%esp)
   0x08048463 <+51>:    movl   $0x3f,0x8(%esp)
   0x0804846b <+59>:    movl   $0x8048560,0x4(%esp)
   0x08048473 <+67>:    call   0x804835c <memcpy@plt>
   0x08048478 <+72>:    dec    %edi
   0x08048479 <+73>:    jne    0x8048460
   0x0804847b <+75>:    xor    %eax,%eax
   0x0804847d <+77>:    add    $0x100c,%esp
   0x08048483 <+83>:    pop    %esi
   0x08048484 <+84>:    pop    %edi
   0x08048485 <+85>:    ret   

Frankly, I thought llvm will perform better (unless I'm doing something wrong).

Friday, May 14, 2010

Wednesday, May 12, 2010

just a couple of prime numbers

Greetings everyone,

gcc-4.5/libstdc++-v3/src/hashtable-aux.cc
 namespace __detail
{
  extern const unsigned long __prime_list[] = // 256 + 1 or 256 + 48 + 1
  {
    2ul, 3ul, 5ul, 7ul, 11ul, 13ul, 17ul, 19ul, 23ul, 29ul, 31ul,
    37ul, 41ul, 43ul, 47ul, 53ul, ....

Thursday, May 6, 2010

memcpy

Greetings everyone,

gcc 4.5

Suppose we have the following code
int main(int argc, char *argv[])
{
    const char *x = "I'll never write useless code! I'll never write useless code! ";
    char y[4096] = {0,}; /*nevermind*/

    memcpy(y, x, strlen(x));
    printf("%s", y);
   
    return 0;
}

Now, let's "rework" it to
    const char *x = "I'll never write useless code! I'll never write useless code! I'll never write useless code! ";

What's the difference?

/*x86, cpu family:6, model:15, cache_alignment:64*/
g++ -O2

For the 64 bytes string we have 16 movl instructions:
(or movl, ..., movl, movw)

Dump of assembler code for function main:
   0x08048420 <+0>:    push   %ebp
   0x08048421 <+1>:    xor    %eax,%eax
   0x08048423 <+3>:    mov    %esp,%ebp
   0x08048425 <+5>:    and    $0xfffffff0,%esp
   0x08048428 <+8>:    push   %edi
   0x08048429 <+9>:    mov    $0x400,%ecx
   0x0804842e <+14>:    sub    $0x101c,%esp
   0x08048434 <+20>:    lea    0x10(%esp),%edi
   0x08048438 <+24>:    rep stos %eax,%es:(%edi)
   0x0804843a <+26>:    lea    0x10(%esp),%eax
   0x0804843e <+30>:    movl   $0x6c6c2749,0x10(%esp)
   0x08048446 <+38>:    movl   $0x76656e20,0x14(%esp)
   0x0804844e <+46>:    movl   $0x77207265,0x18(%esp)
   0x08048456 <+54>:    movl   $0x65746972,0x1c(%esp)
   0x0804845e <+62>:    movl   $0x75206120,0x20(%esp)
   0x08048466 <+70>:    movl   $0x656c6573,0x24(%esp)
   0x0804846e <+78>:    movl   $0x63207373,0x28(%esp)
   0x08048476 <+86>:    movl   $0x2165646f,0x2c(%esp)
   0x0804847e <+94>:    movl   $0x6c6c2749,0x30(%esp)
   0x08048486 <+102>:    movl   $0x76656e20,0x34(%esp)
   0x0804848e <+110>:    movl   $0x77207265,0x38(%esp)
   0x08048496 <+118>:    movl   $0x65746972,0x3c(%esp)
   0x0804849e <+126>:    movl   $0x75206120,0x40(%esp)
   0x080484a6 <+134>:    movl   $0x656c6573,0x44(%esp)
   0x080484ae <+142>:    movl   $0x63207373,0x48(%esp)
   0x080484b6 <+150>:    movl   $0x2165646f,0x4c(%esp)
   0x080484be <+158>:    mov    %eax,0x4(%esp)
   0x080484c2 <+162>:    movl   $0x80485a0,(%esp)
   0x080484c9 <+169>:    call   0x8048348
   0x080484ce <+174>:    add    $0x101c,%esp
   0x080484d4 <+180>:    xor    %eax,%eax
   0x080484d6 <+182>:    pop    %edi
   0x080484d7 <+183>:    mov    %ebp,%esp
   0x080484d9 <+185>:    pop    %ebp
   0x080484da <+186>:    ret



And this one is for >64 bytes string:

Dump of assembler code for function main:
   0x08048420 <+0>:    push   %ebp
   0x08048421 <+1>:    xor    %eax,%eax
   0x08048423 <+3>:    mov    %esp,%ebp
   0x08048425 <+5>:    and    $0xfffffff0,%esp
   0x08048428 <+8>:    sub    $0x1020,%esp
   0x0804842e <+14>:    mov    $0x400,%ecx
   0x08048433 <+19>:    mov    %edi,0x101c(%esp)
   0x0804843a <+26>:    lea    0x10(%esp),%edi
   0x0804843e <+30>:    mov    %esi,0x1018(%esp)
   0x08048445 <+37>:    mov    $0x8048540,%esi
   0x0804844a <+42>:    rep stos %eax,%es:(%edi)
   0x0804844c <+44>:    lea    0x10(%esp),%eax
   0x08048450 <+48>:    mov    %eax,%edi
   0x08048452 <+50>:    mov    $0x20,%cl
   0x08048454 <+52>:    rep movsl %ds:(%esi),%es:(%edi)
   0x08048456 <+54>:    mov    %eax,0x4(%esp)
   0x0804845a <+58>:    movl   $0x80485c4,(%esp)
   0x08048461 <+65>:    call   0x8048348
   0x08048466 <+70>:    xor    %eax,%eax
   0x08048468 <+72>:    mov    0x1018(%esp),%esi
   0x0804846f <+79>:    mov    0x101c(%esp),%edi
   0x08048476 <+86>:    mov    %ebp,%esp
   0x08048478 <+88>:    pop    %ebp
   0x08048479 <+89>:    ret 


In numbers: On my machine copying of 64 bytes string costs min 60 (avg. 73) cycles and 65 bytes string - min 96 (avg. 113) cycles.

Wednesday, May 5, 2010

delete vs delete[]

Greetings everyone,

gcc 4.4/gcc 4.5

int
*p = NULL;
delete p;
0x08049fe5 <+0>: push %ebp
0x08049fe6 <+1>: mov %esp,%ebp
0x08049fe8 <+3>: and $0xfffffff0,%esp
0x08049feb <+6>: sub $0x20,%esp
0x08049fee <+9>: call 0x8048fb4 <mcount@plt>
0x08049ff3 <+14>: movl $0x0,0x1c(%esp)
0x08049ffb <+22>: mov 0x1c(%esp),%eax
0x08049fff <+26>: mov %eax,(%esp)
0x0804a002 <+29>: call 0x8048e54 <_ZdlPv@plt>
0x0804a007 <+34>: mov $0x1,%eax
0x0804a00c <+39>: leave
0x0804a00d <+40>: ret
 
int *p = NULL;
delete[] p;
0x08049fe5 <+0>: push %ebp
0x08049fe6 <+1>: mov %esp,%ebp
0x08049fe8 <+3>: and $0xfffffff0,%esp
0x08049feb <+6>: sub $0x20,%esp
0x08049fee <+9>: call 0x8048fa4 <mcount@plt>
0x08049ff3 <+14>: movl $0x0,0x1c(%esp)
0x08049ffb <+22>: cmpl $0x0,0x1c(%esp)
0x0804a000 <+27>: je 0x804a00e <main+41>
0x0804a002 <+29>: mov 0x1c(%esp),%eax
0x0804a006 <+33>: mov %eax,(%esp)
0x0804a009 <+36>: call 0x8048fc4 <_ZdaPv@plt>
0x0804a00e <+41>: mov $0x1,%eax
0x0804a013 <+46>: leave
0x0804a014 <+47>: ret
So, we are not calling ::operator delete[] in case of NULL-pointer.
0x08049ffb <+22>: cmpl $0x0,0x1c(%esp)
0x0804a000 <+27>: je 0x804a00e <main+41>

Any comments?