|
You do not even need to subtract 'r'. int run_switches(const char* s) {
int s_count = 0;
const char *begin = s;
while(*s) {
s_count += (1 & *s++);
}
int count = s-begin;
return count - s_count;
}
which compiles to: .L49:
and edx, 1
add rax, 1
add ecx, edx
movzx edx, BYTE PTR [rax]
test dl, dl
jne .L49
edit: other variant int run_switches2(const char* s) {
const char *begin = s;
int sum = 0;
while(*s) {
sum += *s++;
}
int count = s-begin;
int s_count = sum - ('s'*count)/('p'-'s');
int p_count = count - s_count;
return p_count - s_count;
}
which compiles to: run_switches2(char const*):
movsx eax, BYTE PTR [rdi]
test al, al
je .L56
mov rdx, rdi
xor ecx, ecx
.L55:
add rdx, 1
add ecx, eax
movsx eax, BYTE PTR [rdx]
test al, al
jne .L55
sub rdx, rdi
imul esi, edx, 115
movsx rax, esi
sar esi, 31
imul rax, rax, 1431655766
shr rax, 32
sub eax, esi
add ecx, eax
sub edx, ecx
mov eax, edx
sub eax, ecx
ret
.L56:
xor eax, eax
ret
None of these will beat the clever blocked SIMD someone showed elsethread. |