Hi,
gcc-4.5.1
suppose we have very simple and dumb code:
void save_state(int i) {
static int _foo_i = i + 0x09;
static int _foo_j = i;
}
g++ -O2 will give us:
4005c0 <+0>: cmpb $0x0,0x200491(%rip) # 0x600a58 <_ZGVZ10save_stateiE6_foo_i>
4005c7 <+7>: push %rbx
4005c8 <+8>: mov %edi,%ebx
4005ca <+10>: je 0x400600 <_Z10save_statei+64>
4005cc <+12>: cmpb $0x0,0x20048d(%rip) # 0x600a60 <_ZGVZ10save_stateiE6_foo_j>
4005d3 <+19>: je 0x4005e0 <_Z10save_statei+32>
4005d5 <+21>: pop %rbx
4005d6 <+22>: retq
4005d7 <+23>: nopw 0x0(%rax,%rax,1)
4005e0 <+32>: mov $0x600a60,%edi
4005e5 <+37>: callq 0x4004a0 <__cxa_guard_acquire@plt>
4005ea <+42>: test %eax,%eax
4005ec <+44>: je 0x4005d5 <_Z10save_statei+21>
4005ee <+46>: mov %ebx,0x200474(%rip) # 0x600a68 <_ZZ10save_stateiE6_foo_j>
4005f4 <+52>: mov $0x600a60,%edi
4005f9 <+57>: pop %rbx
4005fa <+58>: jmpq 0x4004c0 <__cxa_guard_release@plt>
4005ff <+63>: nop
400600 <+64>: mov $0x600a58,%edi
400605 <+69>: callq 0x4004a0 <__cxa_guard_acquire@plt>
40060a <+74>: test %eax,%eax
40060c <+76>: je 0x4005cc <_Z10save_statei+12>
40060e <+78>: lea 0x9(%rbx),%eax
400611 <+81>: mov $0x600a58,%edi
400616 <+86>: mov %eax,0x200450(%rip) # 0x600a6c <_ZZ10save_stateiE6_foo_i>
40061c <+92>: callq 0x4004c0 <__cxa_guard_release@plt>
400621 <+97>: jmp 0x4005cc <_Z10save_statei+12>
First of all we're checking global
_ZGVZ10save_stateiE6_foo_i to see whether local
_ZZ10save_stateiE6_foo_i has been initialized with default value (and by the way to
protect it).
After all those crazy
do_lookup_x,
_dl_name_match_p, check_match.10800,
_dl_lookup_symbol_x, etc.
we have
0x00007ffff7b913a3 <+179>: movb $0x1,0x1(%rdi)
in
__cxa_guard_acquire, which sets our global
_ZGVZ10save_stateiE6_foo_i to:
0x600a58 <_ZGVZ10save_stateiE6_foo_i>: 0x00000100
and
0x00007ffff7b91459 <+57>: movb $0x0,0x1(%rdi)
0x00007ffff7b9145d <+61>: movb $0x1,(%rdi)
in
__cxa_guard_release which sets
_ZGVZ10save_stateiE6_foo_i to:
0x600a58 <_ZGVZ10save_stateiE6_foo_i>: 0x00000001
g++ -Os will give us:
4005b4 <+0>: cmpb $0x0,0x20046d(%rip) # 0x600a28 <_ZGVZ10save_stateiE6_foo_i>
4005bb <+7>: push %rbx
4005bc <+8>: mov %edi,%ebx
4005be <+10>: jne 0x4005e1 <_Z10save_statei+45>
4005c0 <+12>: mov $0x600a28,%edi
4005c5 <+17>: callq 0x4004a0 <__cxa_guard_acquire@plt>
4005ca <+22>: test %eax,%eax
4005cc <+24>: je 0x4005e1 <_Z10save_statei+45>
4005ce <+26>: lea 0x9(%rbx),%eax
4005d1 <+29>: mov $0x600a28,%edi
4005d6 <+34>: mov %eax,0x200460(%rip) # 0x600a3c <_ZZ10save_stateiE6_foo_i>
4005dc <+40>: callq 0x4004c0 <__cxa_guard_release@plt>
4005e1 <+45>: cmpb $0x0,0x200448(%rip) # 0x600a30 <_ZGVZ10save_stateiE6_foo_j>
4005e8 <+52>: jne 0x400609 <_Z10save_statei+85>
4005ea <+54>: mov $0x600a30,%edi
4005ef <+59>: callq 0x4004a0 <__cxa_guard_acquire@plt>
4005f4 <+64>: test %eax,%eax
4005f6 <+66>: je 0x400609 <_Z10save_statei+85>
4005f8 <+68>: mov %ebx,0x20043a(%rip) # 0x600a38 <_ZZ10save_stateiE6_foo_j>
4005fe <+74>: mov $0x600a30,%edi
400603 <+79>: pop %rbx
400604 <+80>: jmpq 0x4004c0 <__cxa_guard_release@plt>
400609 <+85>: pop %rbx
40060a <+86>: retq
And by the way, g++ tries to help us with
__cxa_guard_acquire/
__cxa_guard_release
which is thread-safe static variable initialization.
If you don't need it - don't pay for it.
g++ -O2
-fno-threadsafe-statics
g++ -Os
-fno-threadsafe-statics
will generate equal code:
4004d4 <+0>: cmpb $0x0,0x20042d(%rip) # 0x600908 <_ZGVZ10save_stateiE6_foo_i>
4004db <+7>: jne 0x4004ed <_Z10save_statei+25>
4004dd <+9>: lea 0x9(%rdi),%eax
4004e0 <+12>: movb $0x1,0x200421(%rip) # 0x600908 <_ZGVZ10save_stateiE6_foo_i>
4004e7 <+19>: mov %eax,0x20042f(%rip) # 0x60091c <_ZZ10save_stateiE6_foo_i>
4004ed <+25>: cmpb $0x0,0x20041c(%rip) # 0x600910 <_ZGVZ10save_stateiE6_foo_j>
4004f4 <+32>: jne 0x400503 <_Z10save_statei+47>
4004f6 <+34>: mov %edi,0x20041c(%rip) # 0x600918 <_ZZ10save_stateiE6_foo_j>
4004fc <+40>: movb $0x1,0x20040d(%rip) # 0x600910 <_ZGVZ10save_stateiE6_foo_j>
400503 <+47>: retq
The interesting part here is
movb $0x1, _ZGVZ10save_stateiE6_foo_i
Keep it simple.