| HN Mirror

Partial specialization specifically. Match some patterns and covert it to something else. For example:

  struct F { double x; };
  enum Op { Add, Mul };
  auto eval(F x) { return x.x; }
  template<class L, class R, Op op> struct Expr;
  template<class L, class R> struct Expr<L,R,Add>{  L l; R r; 
    friend auto eval(Expr self) { return eval(self.l) + eval(self.r); } };
  template<class L, class R> struct Expr<L,R,Mul>{  L l; R r; 
    friend auto eval(Expr self) { return eval(self.l) * eval(self.r); } };
  template<class L, class R, class R2> struct Expr<Expr<L, R, Mul>, R2, Add>{   Expr<L,R, Mul> l; R2 r; 
    friend auto eval(Expr self) { return fma(eval(self.l.l), eval(self.l.r), eval(self.r));}};
  template<class L, class R>
  auto operator +(L l, R r) { return Expr<L, R, Add>{l, r}; } 
  template<class L, class R>
  auto operator *(L l, R r) { return Expr<L, R, Mul>{l, r}; } 

  double optimized(F x, F y, F z) { return eval(x * y + z); }
  double non_optimized(F x, F y, F z) { return eval(x + y * z); }

Optimized always generates a call to fma, non-optimized does not. Use -O1 to see the difference (will inline trivial functions, but will not do other optimizations). -O0 also generates the fma, but it is lost in the noise.

The magic happens by specifically matching the pattern Expr<Expr<L, R, Mul>, R2, Add>; try to add a rule to optimize x+y*z as well.