Hacker News new | ask | show | jobs
by aaronmdjones 3421 days ago
> * Inefficient instructions are replaced with more efficient instructions. For example gcc will for a simple x % 19 generate no less than 16 instructions instead of a single div/idiv. This is probably still faster, but it may still be detrimental if it's not in a hot path. It should be noted that gcc emits this even at -O0.

Does it emit it at -Os ?

1 comments

Curiously not.

-O0:

    main:
    .LFB0:
            .cfi_startproc
            pushq   %rbp
            .cfi_def_cfa_offset 16
            .cfi_offset 6, -16
            movq    %rsp, %rbp
            .cfi_def_cfa_register 6
            subq    $16, %rsp
            movl    %edi, -4(%rbp)
            movq    %rsi, -16(%rbp)
            movl    -4(%rbp), %ecx
            movl    $1808407283, %edx
            movl    %ecx, %eax
            imull   %edx
            sarl    $3, %edx
            movl    %ecx, %eax
            sarl    $31, %eax
            subl    %eax, %edx
            movl    %edx, %eax
            sall    $3, %eax
            addl    %edx, %eax
            addl    %eax, %eax
            addl    %edx, %eax
            subl    %eax, %ecx
            movl    %ecx, %edx
            movl    %edx, %esi
            movl    $.LC0, %edi
            movl    $0, %eax
            call    printf
            movl    $0, %eax
            leave
            .cfi_def_cfa 7, 8
            ret
            .cfi_endproc
-Os:

    main:
    .LFB13:
            .cfi_startproc
            pushq   %rax
            .cfi_def_cfa_offset 16
            movl    %edi, %eax
            movl    $19, %ecx
            cltd
            movl    $.LC0, %esi
            movl    $1, %edi
            idivl   %ecx
            xorl    %eax, %eax
            call    __printf_chk
            xorl    %eax, %eax
            popq    %rdx
            .cfi_def_cfa_offset 8
            ret
            .cfi_endproc
So it kinda performs an optimization when disabling all optimizations that it doesn't do when optimizing for size. Or well, the default codegen is the optimized version. Interesting.