|
In rust this looks rather painful. Might I suggest trying this in Julia, might be a good comparison of performance, ease of use, and readability. Julia does a very nice job of compiling directly to SIMD instruction and lets you inspect the low level code generated. inline function sin9_shaper(x)
c0 = 6.28308759
c1 = -41.33318707
c2 = 81.39900205
c3 = -74.66884436
c4 = 33.15324345 a = abs(x - round(x)) - 0.25
a2 = a * a
((((a2 * c4 + c3) * a2 + c2) * a2 + c1) * a2 + c0) * a
endfunction gen_sinwave(freq, init=0.0, step=0.1)
wave = [sin9_shaper(x) for x = init:step:freq]
end julia> @code_native gen_sinwave(1113.0);
.section __TEXT,__text,regular,pure_instructions
; Function gen_sinwave {
; Location: REPL[60]:2
pushl %ebx
decl %eax
subl $48, %esp
vmovaps %xmm0, %xmm2
; Function gen_sinwave; {
; Location: REPL[60]:2
decl %eax
movl $769501344, %eax ## imm = 0x2DDDA8A0
addl %eax, (%eax)
addb %al, (%eax)
decl %eax
movl $773805120, %ecx ## imm = 0x2E1F5440
addl %eax, (%eax)
addb %al, (%eax)
vmovsd (%ecx), %xmm1 ## xmm1 = mem[0],zero
decl %eax
movl %esp, %ebx
vxorps %xmm0, %xmm0, %xmm0
decl %eax
movl %ebx, %edi
calll %eax
decl %eax
movl $769544608, %eax ## imm = 0x2DDE51A0
addl %eax, (%eax)
addb %al, (%eax)
decl %eax
movl %ebx, %edi
calll %eax
;}
decl %eax
addl $48, %esp
popl %ebx
retl
nopw %cs:(%eax,%eax)
;} end # module |
What you wrote does not guarantee vectorization, it just relies on autovectorization.
Rust already does autovectorization magically behind the scenes thanks to LLVM (which Julia also uses), but explicit SIMD makes it a guarantee.