Falkner and Boettcher, Appendix B, 2

Percentage Accurate: 100.0% → 99.7%
Time: 6.7s
Alternatives: 5
Speedup: 1.6×

Specification

?
\[\begin{array}{l} \\ \left(\frac{\sqrt{2}}{4} \cdot \sqrt{1 - 3 \cdot \left(v \cdot v\right)}\right) \cdot \left(1 - v \cdot v\right) \end{array} \]
(FPCore (v)
 :precision binary64
 (* (* (/ (sqrt 2.0) 4.0) (sqrt (- 1.0 (* 3.0 (* v v))))) (- 1.0 (* v v))))
double code(double v) {
	return ((sqrt(2.0) / 4.0) * sqrt((1.0 - (3.0 * (v * v))))) * (1.0 - (v * v));
}
real(8) function code(v)
    real(8), intent (in) :: v
    code = ((sqrt(2.0d0) / 4.0d0) * sqrt((1.0d0 - (3.0d0 * (v * v))))) * (1.0d0 - (v * v))
end function
public static double code(double v) {
	return ((Math.sqrt(2.0) / 4.0) * Math.sqrt((1.0 - (3.0 * (v * v))))) * (1.0 - (v * v));
}
def code(v):
	return ((math.sqrt(2.0) / 4.0) * math.sqrt((1.0 - (3.0 * (v * v))))) * (1.0 - (v * v))
function code(v)
	return Float64(Float64(Float64(sqrt(2.0) / 4.0) * sqrt(Float64(1.0 - Float64(3.0 * Float64(v * v))))) * Float64(1.0 - Float64(v * v)))
end
function tmp = code(v)
	tmp = ((sqrt(2.0) / 4.0) * sqrt((1.0 - (3.0 * (v * v))))) * (1.0 - (v * v));
end
code[v_] := N[(N[(N[(N[Sqrt[2.0], $MachinePrecision] / 4.0), $MachinePrecision] * N[Sqrt[N[(1.0 - N[(3.0 * N[(v * v), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]], $MachinePrecision]), $MachinePrecision] * N[(1.0 - N[(v * v), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]
\begin{array}{l}

\\
\left(\frac{\sqrt{2}}{4} \cdot \sqrt{1 - 3 \cdot \left(v \cdot v\right)}\right) \cdot \left(1 - v \cdot v\right)
\end{array}

Sampling outcomes in binary64 precision:

Local Percentage Accuracy vs ?

The average percentage accuracy by input value. Horizontal axis shows value of an input variable; the variable is choosen in the title. Vertical axis is accuracy; higher is better. Red represent the original program, while blue represents Herbie's suggestion. These can be toggled with buttons below the plot. The line is an average while dots represent individual samples.

Accuracy vs Speed?

Herbie found 5 alternatives:

AlternativeAccuracySpeedup
The accuracy (vertical axis) and speed (horizontal axis) of each alternatives. Up and to the right is better. The red square shows the initial program, and each blue circle shows an alternative.The line shows the best available speed-accuracy tradeoffs.

Initial Program: 100.0% accurate, 1.0× speedup?

\[\begin{array}{l} \\ \left(\frac{\sqrt{2}}{4} \cdot \sqrt{1 - 3 \cdot \left(v \cdot v\right)}\right) \cdot \left(1 - v \cdot v\right) \end{array} \]
(FPCore (v)
 :precision binary64
 (* (* (/ (sqrt 2.0) 4.0) (sqrt (- 1.0 (* 3.0 (* v v))))) (- 1.0 (* v v))))
double code(double v) {
	return ((sqrt(2.0) / 4.0) * sqrt((1.0 - (3.0 * (v * v))))) * (1.0 - (v * v));
}
real(8) function code(v)
    real(8), intent (in) :: v
    code = ((sqrt(2.0d0) / 4.0d0) * sqrt((1.0d0 - (3.0d0 * (v * v))))) * (1.0d0 - (v * v))
end function
public static double code(double v) {
	return ((Math.sqrt(2.0) / 4.0) * Math.sqrt((1.0 - (3.0 * (v * v))))) * (1.0 - (v * v));
}
def code(v):
	return ((math.sqrt(2.0) / 4.0) * math.sqrt((1.0 - (3.0 * (v * v))))) * (1.0 - (v * v))
function code(v)
	return Float64(Float64(Float64(sqrt(2.0) / 4.0) * sqrt(Float64(1.0 - Float64(3.0 * Float64(v * v))))) * Float64(1.0 - Float64(v * v)))
end
function tmp = code(v)
	tmp = ((sqrt(2.0) / 4.0) * sqrt((1.0 - (3.0 * (v * v))))) * (1.0 - (v * v));
end
code[v_] := N[(N[(N[(N[Sqrt[2.0], $MachinePrecision] / 4.0), $MachinePrecision] * N[Sqrt[N[(1.0 - N[(3.0 * N[(v * v), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]], $MachinePrecision]), $MachinePrecision] * N[(1.0 - N[(v * v), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]
\begin{array}{l}

\\
\left(\frac{\sqrt{2}}{4} \cdot \sqrt{1 - 3 \cdot \left(v \cdot v\right)}\right) \cdot \left(1 - v \cdot v\right)
\end{array}

Alternative 1: 99.7% accurate, 0.7× speedup?

\[\begin{array}{l} \\ \mathsf{fma}\left(\mathsf{fma}\left(\sqrt{2} \cdot \mathsf{fma}\left(-1.125, v \cdot v, -1.5\right), v \cdot v, \sqrt{2}\right) \cdot \left(v \cdot -0.25\right), v, 0.25 \cdot \sqrt{\mathsf{fma}\left(-6, v \cdot v, 2\right)}\right) \end{array} \]
(FPCore (v)
 :precision binary64
 (fma
  (*
   (fma (* (sqrt 2.0) (fma -1.125 (* v v) -1.5)) (* v v) (sqrt 2.0))
   (* v -0.25))
  v
  (* 0.25 (sqrt (fma -6.0 (* v v) 2.0)))))
double code(double v) {
	return fma((fma((sqrt(2.0) * fma(-1.125, (v * v), -1.5)), (v * v), sqrt(2.0)) * (v * -0.25)), v, (0.25 * sqrt(fma(-6.0, (v * v), 2.0))));
}
function code(v)
	return fma(Float64(fma(Float64(sqrt(2.0) * fma(-1.125, Float64(v * v), -1.5)), Float64(v * v), sqrt(2.0)) * Float64(v * -0.25)), v, Float64(0.25 * sqrt(fma(-6.0, Float64(v * v), 2.0))))
end
code[v_] := N[(N[(N[(N[(N[Sqrt[2.0], $MachinePrecision] * N[(-1.125 * N[(v * v), $MachinePrecision] + -1.5), $MachinePrecision]), $MachinePrecision] * N[(v * v), $MachinePrecision] + N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision] * N[(v * -0.25), $MachinePrecision]), $MachinePrecision] * v + N[(0.25 * N[Sqrt[N[(-6.0 * N[(v * v), $MachinePrecision] + 2.0), $MachinePrecision]], $MachinePrecision]), $MachinePrecision]), $MachinePrecision]
\begin{array}{l}

\\
\mathsf{fma}\left(\mathsf{fma}\left(\sqrt{2} \cdot \mathsf{fma}\left(-1.125, v \cdot v, -1.5\right), v \cdot v, \sqrt{2}\right) \cdot \left(v \cdot -0.25\right), v, 0.25 \cdot \sqrt{\mathsf{fma}\left(-6, v \cdot v, 2\right)}\right)
\end{array}
Derivation
  1. Initial program 100.0%

    \[\left(\frac{\sqrt{2}}{4} \cdot \sqrt{1 - 3 \cdot \left(v \cdot v\right)}\right) \cdot \left(1 - v \cdot v\right) \]
  2. Add Preprocessing
  3. Applied rewrites100.0%

    \[\leadsto \color{blue}{\mathsf{fma}\left(\left(\sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot 0.25\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot 0.25\right)} \]
  4. Taylor expanded in v around 0

    \[\leadsto \mathsf{fma}\left(\left(\color{blue}{\left(\sqrt{2} + {v}^{2} \cdot \left(\frac{-3}{2} \cdot \sqrt{2} + \frac{-9}{8} \cdot \left({v}^{2} \cdot \sqrt{2}\right)\right)\right)} \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
  5. Step-by-step derivation
    1. +-commutativeN/A

      \[\leadsto \mathsf{fma}\left(\left(\color{blue}{\left({v}^{2} \cdot \left(\frac{-3}{2} \cdot \sqrt{2} + \frac{-9}{8} \cdot \left({v}^{2} \cdot \sqrt{2}\right)\right) + \sqrt{2}\right)} \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    2. *-commutativeN/A

      \[\leadsto \mathsf{fma}\left(\left(\left(\color{blue}{\left(\frac{-3}{2} \cdot \sqrt{2} + \frac{-9}{8} \cdot \left({v}^{2} \cdot \sqrt{2}\right)\right) \cdot {v}^{2}} + \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    3. lower-fma.f64N/A

      \[\leadsto \mathsf{fma}\left(\left(\color{blue}{\mathsf{fma}\left(\frac{-3}{2} \cdot \sqrt{2} + \frac{-9}{8} \cdot \left({v}^{2} \cdot \sqrt{2}\right), {v}^{2}, \sqrt{2}\right)} \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    4. associate-*r*N/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\frac{-3}{2} \cdot \sqrt{2} + \color{blue}{\left(\frac{-9}{8} \cdot {v}^{2}\right) \cdot \sqrt{2}}, {v}^{2}, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    5. distribute-rgt-outN/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\color{blue}{\sqrt{2} \cdot \left(\frac{-3}{2} + \frac{-9}{8} \cdot {v}^{2}\right)}, {v}^{2}, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    6. +-commutativeN/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \color{blue}{\left(\frac{-9}{8} \cdot {v}^{2} + \frac{-3}{2}\right)}, {v}^{2}, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    7. metadata-evalN/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \left(\frac{-9}{8} \cdot {v}^{2} + \color{blue}{\left(\mathsf{neg}\left(\frac{3}{2}\right)\right)}\right), {v}^{2}, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    8. sub-negN/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \color{blue}{\left(\frac{-9}{8} \cdot {v}^{2} - \frac{3}{2}\right)}, {v}^{2}, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    9. lower-*.f64N/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\color{blue}{\sqrt{2} \cdot \left(\frac{-9}{8} \cdot {v}^{2} - \frac{3}{2}\right)}, {v}^{2}, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    10. lower-sqrt.f64N/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\color{blue}{\sqrt{2}} \cdot \left(\frac{-9}{8} \cdot {v}^{2} - \frac{3}{2}\right), {v}^{2}, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    11. sub-negN/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \color{blue}{\left(\frac{-9}{8} \cdot {v}^{2} + \left(\mathsf{neg}\left(\frac{3}{2}\right)\right)\right)}, {v}^{2}, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    12. metadata-evalN/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \left(\frac{-9}{8} \cdot {v}^{2} + \color{blue}{\frac{-3}{2}}\right), {v}^{2}, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    13. lower-fma.f64N/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \color{blue}{\mathsf{fma}\left(\frac{-9}{8}, {v}^{2}, \frac{-3}{2}\right)}, {v}^{2}, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    14. unpow2N/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \mathsf{fma}\left(\frac{-9}{8}, \color{blue}{v \cdot v}, \frac{-3}{2}\right), {v}^{2}, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    15. lower-*.f64N/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \mathsf{fma}\left(\frac{-9}{8}, \color{blue}{v \cdot v}, \frac{-3}{2}\right), {v}^{2}, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    16. unpow2N/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \mathsf{fma}\left(\frac{-9}{8}, v \cdot v, \frac{-3}{2}\right), \color{blue}{v \cdot v}, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    17. lower-*.f64N/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \mathsf{fma}\left(\frac{-9}{8}, v \cdot v, \frac{-3}{2}\right), \color{blue}{v \cdot v}, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot \frac{1}{4}\right) \]
    18. lower-sqrt.f64100.0

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \mathsf{fma}\left(-1.125, v \cdot v, -1.5\right), v \cdot v, \color{blue}{\sqrt{2}}\right) \cdot 0.25\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot 0.25\right) \]
  6. Applied rewrites100.0%

    \[\leadsto \mathsf{fma}\left(\left(\color{blue}{\mathsf{fma}\left(\sqrt{2} \cdot \mathsf{fma}\left(-1.125, v \cdot v, -1.5\right), v \cdot v, \sqrt{2}\right)} \cdot 0.25\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot 0.25\right) \]
  7. Taylor expanded in v around 0

    \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \mathsf{fma}\left(\frac{-9}{8}, v \cdot v, \frac{-3}{2}\right), v \cdot v, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\color{blue}{2 + -6 \cdot {v}^{2}}} \cdot \frac{1}{4}\right) \]
  8. Step-by-step derivation
    1. +-commutativeN/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \mathsf{fma}\left(\frac{-9}{8}, v \cdot v, \frac{-3}{2}\right), v \cdot v, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\color{blue}{-6 \cdot {v}^{2} + 2}} \cdot \frac{1}{4}\right) \]
    2. lower-fma.f64N/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \mathsf{fma}\left(\frac{-9}{8}, v \cdot v, \frac{-3}{2}\right), v \cdot v, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\color{blue}{\mathsf{fma}\left(-6, {v}^{2}, 2\right)}} \cdot \frac{1}{4}\right) \]
    3. unpow2N/A

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \mathsf{fma}\left(\frac{-9}{8}, v \cdot v, \frac{-3}{2}\right), v \cdot v, \sqrt{2}\right) \cdot \frac{1}{4}\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-6, \color{blue}{v \cdot v}, 2\right)} \cdot \frac{1}{4}\right) \]
    4. lower-*.f64100.0

      \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \mathsf{fma}\left(-1.125, v \cdot v, -1.5\right), v \cdot v, \sqrt{2}\right) \cdot 0.25\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-6, \color{blue}{v \cdot v}, 2\right)} \cdot 0.25\right) \]
  9. Applied rewrites100.0%

    \[\leadsto \mathsf{fma}\left(\left(\mathsf{fma}\left(\sqrt{2} \cdot \mathsf{fma}\left(-1.125, v \cdot v, -1.5\right), v \cdot v, \sqrt{2}\right) \cdot 0.25\right) \cdot \left(-v\right), v, \sqrt{\color{blue}{\mathsf{fma}\left(-6, v \cdot v, 2\right)}} \cdot 0.25\right) \]
  10. Step-by-step derivation
    1. Applied rewrites100.0%

      \[\leadsto \color{blue}{\mathsf{fma}\left(\left(-0.25 \cdot v\right) \cdot \mathsf{fma}\left(\mathsf{fma}\left(-1.125, v \cdot v, -1.5\right) \cdot \sqrt{2}, v \cdot v, \sqrt{2}\right), v, \sqrt{\mathsf{fma}\left(-6, v \cdot v, 2\right)} \cdot 0.25\right)} \]
    2. Final simplification100.0%

      \[\leadsto \mathsf{fma}\left(\mathsf{fma}\left(\sqrt{2} \cdot \mathsf{fma}\left(-1.125, v \cdot v, -1.5\right), v \cdot v, \sqrt{2}\right) \cdot \left(v \cdot -0.25\right), v, 0.25 \cdot \sqrt{\mathsf{fma}\left(-6, v \cdot v, 2\right)}\right) \]
    3. Add Preprocessing

    Alternative 2: 100.0% accurate, 0.8× speedup?

    \[\begin{array}{l} \\ \begin{array}{l} t_0 := \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2}\\ \mathsf{fma}\left(\left(\left(-0.25\right) \cdot t\_0\right) \cdot v, v, t\_0 \cdot 0.25\right) \end{array} \end{array} \]
    (FPCore (v)
     :precision binary64
     (let* ((t_0 (sqrt (* (fma -3.0 (* v v) 1.0) 2.0))))
       (fma (* (* (- 0.25) t_0) v) v (* t_0 0.25))))
    double code(double v) {
    	double t_0 = sqrt((fma(-3.0, (v * v), 1.0) * 2.0));
    	return fma(((-0.25 * t_0) * v), v, (t_0 * 0.25));
    }
    
    function code(v)
    	t_0 = sqrt(Float64(fma(-3.0, Float64(v * v), 1.0) * 2.0))
    	return fma(Float64(Float64(Float64(-0.25) * t_0) * v), v, Float64(t_0 * 0.25))
    end
    
    code[v_] := Block[{t$95$0 = N[Sqrt[N[(N[(-3.0 * N[(v * v), $MachinePrecision] + 1.0), $MachinePrecision] * 2.0), $MachinePrecision]], $MachinePrecision]}, N[(N[(N[((-0.25) * t$95$0), $MachinePrecision] * v), $MachinePrecision] * v + N[(t$95$0 * 0.25), $MachinePrecision]), $MachinePrecision]]
    
    \begin{array}{l}
    
    \\
    \begin{array}{l}
    t_0 := \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2}\\
    \mathsf{fma}\left(\left(\left(-0.25\right) \cdot t\_0\right) \cdot v, v, t\_0 \cdot 0.25\right)
    \end{array}
    \end{array}
    
    Derivation
    1. Initial program 100.0%

      \[\left(\frac{\sqrt{2}}{4} \cdot \sqrt{1 - 3 \cdot \left(v \cdot v\right)}\right) \cdot \left(1 - v \cdot v\right) \]
    2. Add Preprocessing
    3. Applied rewrites100.0%

      \[\leadsto \color{blue}{\mathsf{fma}\left(\left(\sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot 0.25\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot 0.25\right)} \]
    4. Final simplification100.0%

      \[\leadsto \mathsf{fma}\left(\left(\left(-0.25\right) \cdot \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2}\right) \cdot v, v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot 0.25\right) \]
    5. Add Preprocessing

    Alternative 3: 100.0% accurate, 1.6× speedup?

    \[\begin{array}{l} \\ \left(1 - v \cdot v\right) \cdot \left(0.25 \cdot \sqrt{\mathsf{fma}\left(-6, v \cdot v, 2\right)}\right) \end{array} \]
    (FPCore (v)
     :precision binary64
     (* (- 1.0 (* v v)) (* 0.25 (sqrt (fma -6.0 (* v v) 2.0)))))
    double code(double v) {
    	return (1.0 - (v * v)) * (0.25 * sqrt(fma(-6.0, (v * v), 2.0)));
    }
    
    function code(v)
    	return Float64(Float64(1.0 - Float64(v * v)) * Float64(0.25 * sqrt(fma(-6.0, Float64(v * v), 2.0))))
    end
    
    code[v_] := N[(N[(1.0 - N[(v * v), $MachinePrecision]), $MachinePrecision] * N[(0.25 * N[Sqrt[N[(-6.0 * N[(v * v), $MachinePrecision] + 2.0), $MachinePrecision]], $MachinePrecision]), $MachinePrecision]), $MachinePrecision]
    
    \begin{array}{l}
    
    \\
    \left(1 - v \cdot v\right) \cdot \left(0.25 \cdot \sqrt{\mathsf{fma}\left(-6, v \cdot v, 2\right)}\right)
    \end{array}
    
    Derivation
    1. Initial program 100.0%

      \[\left(\frac{\sqrt{2}}{4} \cdot \sqrt{1 - 3 \cdot \left(v \cdot v\right)}\right) \cdot \left(1 - v \cdot v\right) \]
    2. Add Preprocessing
    3. Applied rewrites100.0%

      \[\leadsto \color{blue}{\left(\sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot 0.25\right)} \cdot \left(1 - v \cdot v\right) \]
    4. Taylor expanded in v around 0

      \[\leadsto \left(\sqrt{\color{blue}{2 + -6 \cdot {v}^{2}}} \cdot \frac{1}{4}\right) \cdot \left(1 - v \cdot v\right) \]
    5. Step-by-step derivation
      1. +-commutativeN/A

        \[\leadsto \left(\sqrt{\color{blue}{-6 \cdot {v}^{2} + 2}} \cdot \frac{1}{4}\right) \cdot \left(1 - v \cdot v\right) \]
      2. lower-fma.f64N/A

        \[\leadsto \left(\sqrt{\color{blue}{\mathsf{fma}\left(-6, {v}^{2}, 2\right)}} \cdot \frac{1}{4}\right) \cdot \left(1 - v \cdot v\right) \]
      3. unpow2N/A

        \[\leadsto \left(\sqrt{\mathsf{fma}\left(-6, \color{blue}{v \cdot v}, 2\right)} \cdot \frac{1}{4}\right) \cdot \left(1 - v \cdot v\right) \]
      4. lower-*.f64100.0

        \[\leadsto \left(\sqrt{\mathsf{fma}\left(-6, \color{blue}{v \cdot v}, 2\right)} \cdot 0.25\right) \cdot \left(1 - v \cdot v\right) \]
    6. Applied rewrites100.0%

      \[\leadsto \left(\sqrt{\color{blue}{\mathsf{fma}\left(-6, v \cdot v, 2\right)}} \cdot 0.25\right) \cdot \left(1 - v \cdot v\right) \]
    7. Final simplification100.0%

      \[\leadsto \left(1 - v \cdot v\right) \cdot \left(0.25 \cdot \sqrt{\mathsf{fma}\left(-6, v \cdot v, 2\right)}\right) \]
    8. Add Preprocessing

    Alternative 4: 99.4% accurate, 2.3× speedup?

    \[\begin{array}{l} \\ \mathsf{fma}\left(-0.625 \cdot v, v, 0.25\right) \cdot \sqrt{2} \end{array} \]
    (FPCore (v) :precision binary64 (* (fma (* -0.625 v) v 0.25) (sqrt 2.0)))
    double code(double v) {
    	return fma((-0.625 * v), v, 0.25) * sqrt(2.0);
    }
    
    function code(v)
    	return Float64(fma(Float64(-0.625 * v), v, 0.25) * sqrt(2.0))
    end
    
    code[v_] := N[(N[(N[(-0.625 * v), $MachinePrecision] * v + 0.25), $MachinePrecision] * N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]
    
    \begin{array}{l}
    
    \\
    \mathsf{fma}\left(-0.625 \cdot v, v, 0.25\right) \cdot \sqrt{2}
    \end{array}
    
    Derivation
    1. Initial program 100.0%

      \[\left(\frac{\sqrt{2}}{4} \cdot \sqrt{1 - 3 \cdot \left(v \cdot v\right)}\right) \cdot \left(1 - v \cdot v\right) \]
    2. Add Preprocessing
    3. Applied rewrites100.0%

      \[\leadsto \color{blue}{\mathsf{fma}\left(\left(\sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot 0.25\right) \cdot \left(-v\right), v, \sqrt{\mathsf{fma}\left(-3, v \cdot v, 1\right) \cdot 2} \cdot 0.25\right)} \]
    4. Taylor expanded in v around 0

      \[\leadsto \color{blue}{\frac{1}{4} \cdot \sqrt{2} + {v}^{2} \cdot \left(\frac{-3}{8} \cdot \sqrt{2} + \frac{-1}{4} \cdot \sqrt{2}\right)} \]
    5. Step-by-step derivation
      1. +-commutativeN/A

        \[\leadsto \color{blue}{{v}^{2} \cdot \left(\frac{-3}{8} \cdot \sqrt{2} + \frac{-1}{4} \cdot \sqrt{2}\right) + \frac{1}{4} \cdot \sqrt{2}} \]
      2. distribute-rgt-outN/A

        \[\leadsto {v}^{2} \cdot \color{blue}{\left(\sqrt{2} \cdot \left(\frac{-3}{8} + \frac{-1}{4}\right)\right)} + \frac{1}{4} \cdot \sqrt{2} \]
      3. metadata-evalN/A

        \[\leadsto {v}^{2} \cdot \left(\sqrt{2} \cdot \color{blue}{\frac{-5}{8}}\right) + \frac{1}{4} \cdot \sqrt{2} \]
      4. associate-*r*N/A

        \[\leadsto \color{blue}{\left({v}^{2} \cdot \sqrt{2}\right) \cdot \frac{-5}{8}} + \frac{1}{4} \cdot \sqrt{2} \]
      5. *-commutativeN/A

        \[\leadsto \color{blue}{\frac{-5}{8} \cdot \left({v}^{2} \cdot \sqrt{2}\right)} + \frac{1}{4} \cdot \sqrt{2} \]
      6. associate-*r*N/A

        \[\leadsto \color{blue}{\left(\frac{-5}{8} \cdot {v}^{2}\right) \cdot \sqrt{2}} + \frac{1}{4} \cdot \sqrt{2} \]
      7. distribute-rgt-outN/A

        \[\leadsto \color{blue}{\sqrt{2} \cdot \left(\frac{-5}{8} \cdot {v}^{2} + \frac{1}{4}\right)} \]
      8. lower-*.f64N/A

        \[\leadsto \color{blue}{\sqrt{2} \cdot \left(\frac{-5}{8} \cdot {v}^{2} + \frac{1}{4}\right)} \]
      9. lower-sqrt.f64N/A

        \[\leadsto \color{blue}{\sqrt{2}} \cdot \left(\frac{-5}{8} \cdot {v}^{2} + \frac{1}{4}\right) \]
      10. unpow2N/A

        \[\leadsto \sqrt{2} \cdot \left(\frac{-5}{8} \cdot \color{blue}{\left(v \cdot v\right)} + \frac{1}{4}\right) \]
      11. associate-*r*N/A

        \[\leadsto \sqrt{2} \cdot \left(\color{blue}{\left(\frac{-5}{8} \cdot v\right) \cdot v} + \frac{1}{4}\right) \]
      12. lower-fma.f64N/A

        \[\leadsto \sqrt{2} \cdot \color{blue}{\mathsf{fma}\left(\frac{-5}{8} \cdot v, v, \frac{1}{4}\right)} \]
      13. lower-*.f6499.8

        \[\leadsto \sqrt{2} \cdot \mathsf{fma}\left(\color{blue}{-0.625 \cdot v}, v, 0.25\right) \]
    6. Applied rewrites99.8%

      \[\leadsto \color{blue}{\sqrt{2} \cdot \mathsf{fma}\left(-0.625 \cdot v, v, 0.25\right)} \]
    7. Final simplification99.8%

      \[\leadsto \mathsf{fma}\left(-0.625 \cdot v, v, 0.25\right) \cdot \sqrt{2} \]
    8. Add Preprocessing

    Alternative 5: 98.7% accurate, 3.9× speedup?

    \[\begin{array}{l} \\ 0.25 \cdot \sqrt{2} \end{array} \]
    (FPCore (v) :precision binary64 (* 0.25 (sqrt 2.0)))
    double code(double v) {
    	return 0.25 * sqrt(2.0);
    }
    
    real(8) function code(v)
        real(8), intent (in) :: v
        code = 0.25d0 * sqrt(2.0d0)
    end function
    
    public static double code(double v) {
    	return 0.25 * Math.sqrt(2.0);
    }
    
    def code(v):
    	return 0.25 * math.sqrt(2.0)
    
    function code(v)
    	return Float64(0.25 * sqrt(2.0))
    end
    
    function tmp = code(v)
    	tmp = 0.25 * sqrt(2.0);
    end
    
    code[v_] := N[(0.25 * N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]
    
    \begin{array}{l}
    
    \\
    0.25 \cdot \sqrt{2}
    \end{array}
    
    Derivation
    1. Initial program 100.0%

      \[\left(\frac{\sqrt{2}}{4} \cdot \sqrt{1 - 3 \cdot \left(v \cdot v\right)}\right) \cdot \left(1 - v \cdot v\right) \]
    2. Add Preprocessing
    3. Taylor expanded in v around 0

      \[\leadsto \color{blue}{\frac{1}{4} \cdot \sqrt{2}} \]
    4. Step-by-step derivation
      1. *-commutativeN/A

        \[\leadsto \color{blue}{\sqrt{2} \cdot \frac{1}{4}} \]
      2. lower-*.f64N/A

        \[\leadsto \color{blue}{\sqrt{2} \cdot \frac{1}{4}} \]
      3. lower-sqrt.f6499.0

        \[\leadsto \color{blue}{\sqrt{2}} \cdot 0.25 \]
    5. Applied rewrites99.0%

      \[\leadsto \color{blue}{\sqrt{2} \cdot 0.25} \]
    6. Final simplification99.0%

      \[\leadsto 0.25 \cdot \sqrt{2} \]
    7. Add Preprocessing

    Reproduce

    ?
    herbie shell --seed 2024331 
    (FPCore (v)
      :name "Falkner and Boettcher, Appendix B, 2"
      :precision binary64
      (* (* (/ (sqrt 2.0) 4.0) (sqrt (- 1.0 (* 3.0 (* v v))))) (- 1.0 (* v v))))