For __builtin_popcountll
in GCC, all you need to do is add -mpopcnt
#include <stdlib.h>
int main(int argc, char **argv) {
return __builtin_popcountll(atoi(argv[1]));
}
with -mpopcnt
$ otool -tvV a.out
a.out:
(__TEXT,__text) section
_main:
0000000100000f66 pushq %rbp
0000000100000f67 movq %rsp, %rbp
0000000100000f6a subq $0x10, %rsp
0000000100000f6e movq %rdi, -0x8(%rbp)
0000000100000f72 movq -0x8(%rbp), %rax
0000000100000f76 addq $0x8, %rax
0000000100000f7a movq (%rax), %rax
0000000100000f7d movq %rax, %rdi
0000000100000f80 callq 0x100000f8e ## symbol stub for: _atoi
0000000100000f85 cltq
0000000100000f87 popcntq %rax, %rax
0000000100000f8c leave
0000000100000f8d retq
without -mpopcnt
a.out:
(__TEXT,__text) section
_main:
0000000100000f55 pushq %rbp
0000000100000f56 movq %rsp, %rbp
0000000100000f59 subq $0x10, %rsp
0000000100000f5d movq %rdi, -0x8(%rbp)
0000000100000f61 movq -0x8(%rbp), %rax
0000000100000f65 addq $0x8, %rax
0000000100000f69 movq (%rax), %rax
0000000100000f6c movq %rax, %rdi
0000000100000f6f callq 0x100000f86 ## symbol stub for: _atoi
0000000100000f74 cltq
0000000100000f76 movq %rax, %rdi
0000000100000f79 callq 0x100000f80 ## symbol stub for: ___popcountdi2
0000000100000f7e leave
0000000100000f7f retq
Notes
Be sure to check the ABM bit (bit 23) of CPUID feature bits before using POPCNTQ
-mpopcnt
as part of-msse4.2
, even though they have separate CPUID feature bits. godbolt.org/g/SfcHYh. Also, if you__builtin_popcountll(argc)
, your program won't optimize toreturn 32
when you enable optimization. Or just look at asm for a function with an int arg, since you just want to look at asm, not run it. However,-march=native
is by far the best choice if you're going to run your binary locally, since it sets-mtune
as well as enabling instructions. – Incompetent