| HN Mirror

To be fair, even in standard C11 you can do a bit better than the CPU manufacturer’s syntax

  #define vaddv(A, B) _Generic((A),
      int8x8_t:    vaddv_s8((A), (B)),
      uint8x8_t:   vaddv_u8((A), (B)),
      int8x16_t:   vaddvq_s8((A), (B)),
      uint8x16_t:  vaddvq_u8((A), (B)),
      int16x4_t:   vaddv_s16((A), (B)),
      uint16x4_t:  vaddv_u16((A), (B)),
      int16x8_t:   vaddvq_s16((A), (B)),
      uint16x8_t:  vaddvq_u16((A), (B)),
      int32x2_t:   vaddv_s32((A), (B)),
      uint32x2_t:  vaddv_u32((A), (B)),
      float32x2_t: vaddv_f32((A), (B)),
      int32x4_t:   vaddvq_s32((A), (B)),
      uint32x4_t:  vaddvq_u32((A), (B)),
      float32x4_t: vaddvq_f32((A), (B)),
      int64x2_t:   vaddvq_s64((A), (B)),
      uint64x2_t:  vaddvq_u64((A), (B)),
      float64x2_t: vaddvq_f64((A), (B)))

while in GNU C you can in fact use normal arithmetic and indexing (but not swizzles) on vector types.