|
> 126msec for 100k cycles. Or to put it another way: 1,200 nanoseconds. That's about 3,000-5,000 instructions on a modern CPU. Believe it or not, that's actually pretty bad. After jumping through some hoops to ensure that rustc doesn't just compile the whole thing down to a constant, I benchmarked my version as taking 15-20 nanoseconds per iteration. About 45-80 instructions! I actually couldn't quite believe it myself, so I jumped through more hoops to ensure that it wasn't being optimised away, wasn't getting inlined too aggressively, etc... No change. Ran it through Godbolt to inspect the assembly, and then I realised that, yes, modern languages, compilers, and CPUs really are this good! Think about it: For the specific input example with 4 strings the algorithm boils down to: compare 7 bytes with 7 bytes, replace a pointer with another pointer, then compare 2 bytes with 2 bytes three times. That's about a hundred assembly instructions, or thereabouts. Godbolt output: push rbp
push r15
push r14
push r13
push r12
push rbx
push rax
mov r12,rdi
mov ebx,0x8
xor ebp,ebp
lea r14,[rip+0x3cb3c] # 444e8 <anon.6527da4acb4810bb73692fa85a2e25ef.0.llvm.5506334730328533235+0x20>
mov r15,QWORD PTR [rip+0x3f3b5] # 46d68 <bcmp@GLIBC_2.2.5>
cs nop WORD PTR [rax+rax*1+0x0]
nop DWORD PTR [rax]
cmp rbp,0x2
je 79f2 <example::testabc+0x62>
mov r13,rbp
mov rdx,QWORD PTR [rbx+r14*1]
cmp rdx,QWORD PTR [r12+rbx*1]
jne 79ec <example::testabc+0x5c>
lea rbp,[r13+0x1]
mov rsi,QWORD PTR [r12+rbx*1-0x8]
mov rdi,QWORD PTR [rbx+r14*1-0x8]
call r15
add rbx,0x10
test eax,eax
je 79c0 <example::testabc+0x30>
cmp r13,0x2
jb 7a07 <example::testabc+0x77>
lea rax,[rip+0x3060e] # 38007 <_fini+0xd13>
mov QWORD PTR [r12+0x10],rax
mov QWORD PTR [r12+0x18],0x1
xor ebx,ebx
xor ebp,ebp
nop DWORD PTR [rax+rax*1+0x0]
cmp rbp,0x2
je 7a43 <example::testabc+0xb3>
mov r13,rbp
mov rdx,QWORD PTR [rbx+r14*1+0x8]
cmp rdx,QWORD PTR [r12+rbx*1+0x18]
jne 7a3d <example::testabc+0xad>
lea rbp,[r13+0x1]
mov rsi,QWORD PTR [r12+rbx*1+0x10]
mov rdi,QWORD PTR [rbx+r14*1]
call r15
add rbx,0x10
test eax,eax
je 7a10 <example::testabc+0x80>
cmp r13,0x2
jb 7a58 <example::testabc+0xc8>
lea rax,[rip+0x305bd] # 38007 <_fini+0xd13>
mov QWORD PTR [r12+0x20],rax
mov QWORD PTR [r12+0x28],0x1
mov ebx,0x8
xor ebp,ebp
nop
cmp rbp,0x2
je 7a93 <example::testabc+0x103>
mov r13,rbp
mov rdx,QWORD PTR [rbx+r14*1]
cmp rdx,QWORD PTR [r12+rbx*1+0x20]
jne 7a8d <example::testabc+0xfd>
lea rbp,[r13+0x1]
mov rsi,QWORD PTR [r12+rbx*1+0x18]
mov rdi,QWORD PTR [rbx+r14*1-0x8]
call r15
add rbx,0x10
test eax,eax
je 7a60 <example::testabc+0xd0>
cmp r13,0x2
jb 7aa8 <example::testabc+0x118>
lea rax,[rip+0x3056d] # 38007 <_fini+0xd13>
mov QWORD PTR [r12+0x30],rax
mov QWORD PTR [r12+0x38],0x1
add rsp,0x8
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
nop WORD PTR [rax+rax*1+0x0]
Rust? It simply impresses. |
Your rust code has a bug in it, is longer, and you spent more time on it.