|
"That's a huge improvement in generated code size. The above two compiles used the same gcc flags" It would have been awfully nice to state the version of the compiler and the flags being used. With gcc 10 on Linux/AMD64, libc 4.15.0, I get with '-Os': --8<--
strlcpy:
.LFB5:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rdi, %rbp
movq %rsi, %rdi
pushq %rbx
.cfi_def_cfa_offset 24
.cfi_offset 3, -24
movq %rdx, %rbx
subq $24, %rsp
.cfi_def_cfa_offset 48
movq %rsi, 8(%rsp)
call strlen
testq %rbx, %rbx
movq 8(%rsp), %rsi
je .L1
leaq -1(%rbx), %rdx
movq %rbp, %rdi
cmpq %rax, %rdx
cmova %rax, %rdx
movq %rdx, %rcx
movq %rdx, %rcx
rep movsb
movb $0, 0(%rbp,%rd
.L1ubq $24, %rsp
.cfi_def_cfa_offset 48
movq %rsi, 8(%rsp)
call strlen
testq %rbx, %rbx
movq 8(%rsp), %rsi
je .L1
leaq -1(%rbx), %rdx
movq %rbp, %rdi
cmpq %rax, %rdx
cmova %rax, %rdx
movq %rdx, %rcx
cmova %rax, %rdx
movq %rdx, %rcx
addq $24, %rsp
.cfi_def_cfa_offset 24
popq %rbx
.cfi_def_cfa_offset 16
popq %rbp
.cfi_def_cfa_offset 8
ret
.cfi_endproc
-->8--
Which doesn't seem so bad. |