UniformSampleCone 2

Percentage Accurate: 99.0% → 99.0%
Time: 20.8s
Alternatives: 14
Speedup: 1.0×

Specification

?
\[\left(\left(\left(\left(\left(-10000 \leq xi \land xi \leq 10000\right) \land \left(-10000 \leq yi \land yi \leq 10000\right)\right) \land \left(-10000 \leq zi \land zi \leq 10000\right)\right) \land \left(2.328306437 \cdot 10^{-10} \leq ux \land ux \leq 1\right)\right) \land \left(2.328306437 \cdot 10^{-10} \leq uy \land uy \leq 1\right)\right) \land \left(0 \leq maxCos \land maxCos \leq 1\right)\]
\[\begin{array}{l} \\ \begin{array}{l} t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\ t_1 := \sqrt{1 - t\_0 \cdot t\_0}\\ t_2 := \left(uy \cdot 2\right) \cdot \pi\\ \left(\left(\cos t\_2 \cdot t\_1\right) \cdot xi + \left(\sin t\_2 \cdot t\_1\right) \cdot yi\right) + t\_0 \cdot zi \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* (* (- 1.0 ux) maxCos) ux))
        (t_1 (sqrt (- 1.0 (* t_0 t_0))))
        (t_2 (* (* uy 2.0) PI)))
   (+ (+ (* (* (cos t_2) t_1) xi) (* (* (sin t_2) t_1) yi)) (* t_0 zi))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = ((1.0f - ux) * maxCos) * ux;
	float t_1 = sqrtf((1.0f - (t_0 * t_0)));
	float t_2 = (uy * 2.0f) * ((float) M_PI);
	return (((cosf(t_2) * t_1) * xi) + ((sinf(t_2) * t_1) * yi)) + (t_0 * zi);
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(Float32(Float32(Float32(1.0) - ux) * maxCos) * ux)
	t_1 = sqrt(Float32(Float32(1.0) - Float32(t_0 * t_0)))
	t_2 = Float32(Float32(uy * Float32(2.0)) * Float32(pi))
	return Float32(Float32(Float32(Float32(cos(t_2) * t_1) * xi) + Float32(Float32(sin(t_2) * t_1) * yi)) + Float32(t_0 * zi))
end
function tmp = code(xi, yi, zi, ux, uy, maxCos)
	t_0 = ((single(1.0) - ux) * maxCos) * ux;
	t_1 = sqrt((single(1.0) - (t_0 * t_0)));
	t_2 = (uy * single(2.0)) * single(pi);
	tmp = (((cos(t_2) * t_1) * xi) + ((sin(t_2) * t_1) * yi)) + (t_0 * zi);
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\
t_1 := \sqrt{1 - t\_0 \cdot t\_0}\\
t_2 := \left(uy \cdot 2\right) \cdot \pi\\
\left(\left(\cos t\_2 \cdot t\_1\right) \cdot xi + \left(\sin t\_2 \cdot t\_1\right) \cdot yi\right) + t\_0 \cdot zi
\end{array}
\end{array}

Sampling outcomes in binary32 precision:

Local Percentage Accuracy vs ?

The average percentage accuracy by input value. Horizontal axis shows value of an input variable; the variable is choosen in the title. Vertical axis is accuracy; higher is better. Red represent the original program, while blue represents Herbie's suggestion. These can be toggled with buttons below the plot. The line is an average while dots represent individual samples.

Accuracy vs Speed?

Herbie found 14 alternatives:

AlternativeAccuracySpeedup
The accuracy (vertical axis) and speed (horizontal axis) of each alternatives. Up and to the right is better. The red square shows the initial program, and each blue circle shows an alternative.The line shows the best available speed-accuracy tradeoffs.

Initial Program: 99.0% accurate, 1.0× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\ t_1 := \sqrt{1 - t\_0 \cdot t\_0}\\ t_2 := \left(uy \cdot 2\right) \cdot \pi\\ \left(\left(\cos t\_2 \cdot t\_1\right) \cdot xi + \left(\sin t\_2 \cdot t\_1\right) \cdot yi\right) + t\_0 \cdot zi \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* (* (- 1.0 ux) maxCos) ux))
        (t_1 (sqrt (- 1.0 (* t_0 t_0))))
        (t_2 (* (* uy 2.0) PI)))
   (+ (+ (* (* (cos t_2) t_1) xi) (* (* (sin t_2) t_1) yi)) (* t_0 zi))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = ((1.0f - ux) * maxCos) * ux;
	float t_1 = sqrtf((1.0f - (t_0 * t_0)));
	float t_2 = (uy * 2.0f) * ((float) M_PI);
	return (((cosf(t_2) * t_1) * xi) + ((sinf(t_2) * t_1) * yi)) + (t_0 * zi);
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(Float32(Float32(Float32(1.0) - ux) * maxCos) * ux)
	t_1 = sqrt(Float32(Float32(1.0) - Float32(t_0 * t_0)))
	t_2 = Float32(Float32(uy * Float32(2.0)) * Float32(pi))
	return Float32(Float32(Float32(Float32(cos(t_2) * t_1) * xi) + Float32(Float32(sin(t_2) * t_1) * yi)) + Float32(t_0 * zi))
end
function tmp = code(xi, yi, zi, ux, uy, maxCos)
	t_0 = ((single(1.0) - ux) * maxCos) * ux;
	t_1 = sqrt((single(1.0) - (t_0 * t_0)));
	t_2 = (uy * single(2.0)) * single(pi);
	tmp = (((cos(t_2) * t_1) * xi) + ((sin(t_2) * t_1) * yi)) + (t_0 * zi);
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\
t_1 := \sqrt{1 - t\_0 \cdot t\_0}\\
t_2 := \left(uy \cdot 2\right) \cdot \pi\\
\left(\left(\cos t\_2 \cdot t\_1\right) \cdot xi + \left(\sin t\_2 \cdot t\_1\right) \cdot yi\right) + t\_0 \cdot zi
\end{array}
\end{array}

Alternative 1: 99.0% accurate, 1.0× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := 2 \cdot \left(uy \cdot \pi\right)\\ t_1 := \sqrt{\mathsf{fma}\left(ux \cdot ux, \left(\left(1 - ux\right) \cdot maxCos\right) \cdot \left(maxCos \cdot \left(ux + -1\right)\right), 1\right)}\\ \mathsf{fma}\left(1 - ux, \left(ux \cdot maxCos\right) \cdot zi, \mathsf{fma}\left(\cos t\_0, t\_1 \cdot xi, t\_1 \cdot \left(\sin t\_0 \cdot yi\right)\right)\right) \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* 2.0 (* uy PI)))
        (t_1
         (sqrt
          (fma
           (* ux ux)
           (* (* (- 1.0 ux) maxCos) (* maxCos (+ ux -1.0)))
           1.0))))
   (fma
    (- 1.0 ux)
    (* (* ux maxCos) zi)
    (fma (cos t_0) (* t_1 xi) (* t_1 (* (sin t_0) yi))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = 2.0f * (uy * ((float) M_PI));
	float t_1 = sqrtf(fmaf((ux * ux), (((1.0f - ux) * maxCos) * (maxCos * (ux + -1.0f))), 1.0f));
	return fmaf((1.0f - ux), ((ux * maxCos) * zi), fmaf(cosf(t_0), (t_1 * xi), (t_1 * (sinf(t_0) * yi))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(Float32(2.0) * Float32(uy * Float32(pi)))
	t_1 = sqrt(fma(Float32(ux * ux), Float32(Float32(Float32(Float32(1.0) - ux) * maxCos) * Float32(maxCos * Float32(ux + Float32(-1.0)))), Float32(1.0)))
	return fma(Float32(Float32(1.0) - ux), Float32(Float32(ux * maxCos) * zi), fma(cos(t_0), Float32(t_1 * xi), Float32(t_1 * Float32(sin(t_0) * yi))))
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := 2 \cdot \left(uy \cdot \pi\right)\\
t_1 := \sqrt{\mathsf{fma}\left(ux \cdot ux, \left(\left(1 - ux\right) \cdot maxCos\right) \cdot \left(maxCos \cdot \left(ux + -1\right)\right), 1\right)}\\
\mathsf{fma}\left(1 - ux, \left(ux \cdot maxCos\right) \cdot zi, \mathsf{fma}\left(\cos t\_0, t\_1 \cdot xi, t\_1 \cdot \left(\sin t\_0 \cdot yi\right)\right)\right)
\end{array}
\end{array}
Derivation
  1. Initial program 99.0%

    \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  2. Add Preprocessing
  3. Applied rewrites99.1%

    \[\leadsto \color{blue}{\mathsf{fma}\left(1 - ux, \left(ux \cdot maxCos\right) \cdot zi, \mathsf{fma}\left(\cos \left(2 \cdot \left(uy \cdot \pi\right)\right), \sqrt{\mathsf{fma}\left(ux \cdot ux, \left(\left(1 - ux\right) \cdot maxCos\right) \cdot \left(-\left(1 - ux\right) \cdot maxCos\right), 1\right)} \cdot xi, \sqrt{\mathsf{fma}\left(ux \cdot ux, \left(\left(1 - ux\right) \cdot maxCos\right) \cdot \left(-\left(1 - ux\right) \cdot maxCos\right), 1\right)} \cdot \left(\sin \left(2 \cdot \left(uy \cdot \pi\right)\right) \cdot yi\right)\right)\right)} \]
  4. Final simplification99.1%

    \[\leadsto \mathsf{fma}\left(1 - ux, \left(ux \cdot maxCos\right) \cdot zi, \mathsf{fma}\left(\cos \left(2 \cdot \left(uy \cdot \pi\right)\right), \sqrt{\mathsf{fma}\left(ux \cdot ux, \left(\left(1 - ux\right) \cdot maxCos\right) \cdot \left(maxCos \cdot \left(ux + -1\right)\right), 1\right)} \cdot xi, \sqrt{\mathsf{fma}\left(ux \cdot ux, \left(\left(1 - ux\right) \cdot maxCos\right) \cdot \left(maxCos \cdot \left(ux + -1\right)\right), 1\right)} \cdot \left(\sin \left(2 \cdot \left(uy \cdot \pi\right)\right) \cdot yi\right)\right)\right) \]
  5. Add Preprocessing

Alternative 2: 98.8% accurate, 1.4× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := \pi \cdot \left(2 \cdot uy\right)\\ \mathsf{fma}\left(maxCos, \left(1 - ux\right) \cdot \left(ux \cdot zi\right), \mathsf{fma}\left(xi, \cos t\_0, yi \cdot \sin t\_0\right)\right) \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* PI (* 2.0 uy))))
   (fma maxCos (* (- 1.0 ux) (* ux zi)) (fma xi (cos t_0) (* yi (sin t_0))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = ((float) M_PI) * (2.0f * uy);
	return fmaf(maxCos, ((1.0f - ux) * (ux * zi)), fmaf(xi, cosf(t_0), (yi * sinf(t_0))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(Float32(pi) * Float32(Float32(2.0) * uy))
	return fma(maxCos, Float32(Float32(Float32(1.0) - ux) * Float32(ux * zi)), fma(xi, cos(t_0), Float32(yi * sin(t_0))))
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := \pi \cdot \left(2 \cdot uy\right)\\
\mathsf{fma}\left(maxCos, \left(1 - ux\right) \cdot \left(ux \cdot zi\right), \mathsf{fma}\left(xi, \cos t\_0, yi \cdot \sin t\_0\right)\right)
\end{array}
\end{array}
Derivation
  1. Initial program 99.0%

    \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  2. Add Preprocessing
  3. Taylor expanded in maxCos around 0

    \[\leadsto \color{blue}{maxCos \cdot \left(ux \cdot \left(zi \cdot \left(1 - ux\right)\right)\right) + \left(xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)} \]
  4. Step-by-step derivation
    1. lower-fma.f32N/A

      \[\leadsto \color{blue}{\mathsf{fma}\left(maxCos, ux \cdot \left(zi \cdot \left(1 - ux\right)\right), xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)} \]
    2. associate-*r*N/A

      \[\leadsto \mathsf{fma}\left(maxCos, \color{blue}{\left(ux \cdot zi\right) \cdot \left(1 - ux\right)}, xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
    3. lower-*.f32N/A

      \[\leadsto \mathsf{fma}\left(maxCos, \color{blue}{\left(ux \cdot zi\right) \cdot \left(1 - ux\right)}, xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
    4. lower-*.f32N/A

      \[\leadsto \mathsf{fma}\left(maxCos, \color{blue}{\left(ux \cdot zi\right)} \cdot \left(1 - ux\right), xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
    5. lower--.f32N/A

      \[\leadsto \mathsf{fma}\left(maxCos, \left(ux \cdot zi\right) \cdot \color{blue}{\left(1 - ux\right)}, xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
    6. lower-fma.f32N/A

      \[\leadsto \mathsf{fma}\left(maxCos, \left(ux \cdot zi\right) \cdot \left(1 - ux\right), \color{blue}{\mathsf{fma}\left(xi, \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)}\right) \]
    7. lower-cos.f32N/A

      \[\leadsto \mathsf{fma}\left(maxCos, \left(ux \cdot zi\right) \cdot \left(1 - ux\right), \mathsf{fma}\left(xi, \color{blue}{\cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)\right) \]
    8. associate-*r*N/A

      \[\leadsto \mathsf{fma}\left(maxCos, \left(ux \cdot zi\right) \cdot \left(1 - ux\right), \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)\right) \]
    9. lower-*.f32N/A

      \[\leadsto \mathsf{fma}\left(maxCos, \left(ux \cdot zi\right) \cdot \left(1 - ux\right), \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)\right) \]
    10. lower-*.f32N/A

      \[\leadsto \mathsf{fma}\left(maxCos, \left(ux \cdot zi\right) \cdot \left(1 - ux\right), \mathsf{fma}\left(xi, \cos \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)\right) \]
    11. lower-PI.f32N/A

      \[\leadsto \mathsf{fma}\left(maxCos, \left(ux \cdot zi\right) \cdot \left(1 - ux\right), \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \color{blue}{\mathsf{PI}\left(\right)}\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)\right) \]
    12. lower-*.f32N/A

      \[\leadsto \mathsf{fma}\left(maxCos, \left(ux \cdot zi\right) \cdot \left(1 - ux\right), \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \color{blue}{yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right)\right) \]
    13. lower-sin.f32N/A

      \[\leadsto \mathsf{fma}\left(maxCos, \left(ux \cdot zi\right) \cdot \left(1 - ux\right), \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \color{blue}{\sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right)\right) \]
    14. associate-*r*N/A

      \[\leadsto \mathsf{fma}\left(maxCos, \left(ux \cdot zi\right) \cdot \left(1 - ux\right), \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right)\right) \]
    15. lower-*.f32N/A

      \[\leadsto \mathsf{fma}\left(maxCos, \left(ux \cdot zi\right) \cdot \left(1 - ux\right), \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right)\right) \]
    16. lower-*.f32N/A

      \[\leadsto \mathsf{fma}\left(maxCos, \left(ux \cdot zi\right) \cdot \left(1 - ux\right), \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
    17. lower-PI.f3298.9

      \[\leadsto \mathsf{fma}\left(maxCos, \left(ux \cdot zi\right) \cdot \left(1 - ux\right), \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \color{blue}{\pi}\right)\right)\right) \]
  5. Applied rewrites98.9%

    \[\leadsto \color{blue}{\mathsf{fma}\left(maxCos, \left(ux \cdot zi\right) \cdot \left(1 - ux\right), \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \pi\right)\right)\right)} \]
  6. Final simplification98.9%

    \[\leadsto \mathsf{fma}\left(maxCos, \left(1 - ux\right) \cdot \left(ux \cdot zi\right), \mathsf{fma}\left(xi, \cos \left(\pi \cdot \left(2 \cdot uy\right)\right), yi \cdot \sin \left(\pi \cdot \left(2 \cdot uy\right)\right)\right)\right) \]
  7. Add Preprocessing

Alternative 3: 95.8% accurate, 1.5× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := \pi \cdot \left(2 \cdot uy\right)\\ \mathsf{fma}\left(xi, \cos t\_0, \mathsf{fma}\left(yi, \sin t\_0, maxCos \cdot \left(ux \cdot zi\right)\right)\right) \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* PI (* 2.0 uy))))
   (fma xi (cos t_0) (fma yi (sin t_0) (* maxCos (* ux zi))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = ((float) M_PI) * (2.0f * uy);
	return fmaf(xi, cosf(t_0), fmaf(yi, sinf(t_0), (maxCos * (ux * zi))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(Float32(pi) * Float32(Float32(2.0) * uy))
	return fma(xi, cos(t_0), fma(yi, sin(t_0), Float32(maxCos * Float32(ux * zi))))
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := \pi \cdot \left(2 \cdot uy\right)\\
\mathsf{fma}\left(xi, \cos t\_0, \mathsf{fma}\left(yi, \sin t\_0, maxCos \cdot \left(ux \cdot zi\right)\right)\right)
\end{array}
\end{array}
Derivation
  1. Initial program 99.0%

    \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  2. Add Preprocessing
  3. Taylor expanded in ux around 0

    \[\leadsto \color{blue}{maxCos \cdot \left(ux \cdot zi\right) + \left(xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)} \]
  4. Step-by-step derivation
    1. +-commutativeN/A

      \[\leadsto \color{blue}{\left(xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) + maxCos \cdot \left(ux \cdot zi\right)} \]
    2. associate-+l+N/A

      \[\leadsto \color{blue}{xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + \left(yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + maxCos \cdot \left(ux \cdot zi\right)\right)} \]
    3. lower-fma.f32N/A

      \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + maxCos \cdot \left(ux \cdot zi\right)\right)} \]
    4. lower-cos.f32N/A

      \[\leadsto \mathsf{fma}\left(xi, \color{blue}{\cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + maxCos \cdot \left(ux \cdot zi\right)\right) \]
    5. associate-*r*N/A

      \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + maxCos \cdot \left(ux \cdot zi\right)\right) \]
    6. lower-*.f32N/A

      \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + maxCos \cdot \left(ux \cdot zi\right)\right) \]
    7. lower-*.f32N/A

      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + maxCos \cdot \left(ux \cdot zi\right)\right) \]
    8. lower-PI.f32N/A

      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \color{blue}{\mathsf{PI}\left(\right)}\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + maxCos \cdot \left(ux \cdot zi\right)\right) \]
    9. lower-fma.f32N/A

      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \color{blue}{\mathsf{fma}\left(yi, \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right), maxCos \cdot \left(ux \cdot zi\right)\right)}\right) \]
    10. lower-sin.f32N/A

      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \mathsf{fma}\left(yi, \color{blue}{\sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}, maxCos \cdot \left(ux \cdot zi\right)\right)\right) \]
    11. associate-*r*N/A

      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \mathsf{fma}\left(yi, \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, maxCos \cdot \left(ux \cdot zi\right)\right)\right) \]
    12. lower-*.f32N/A

      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \mathsf{fma}\left(yi, \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, maxCos \cdot \left(ux \cdot zi\right)\right)\right) \]
    13. lower-*.f32N/A

      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \mathsf{fma}\left(yi, \sin \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right), maxCos \cdot \left(ux \cdot zi\right)\right)\right) \]
    14. lower-PI.f32N/A

      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \mathsf{fma}\left(yi, \sin \left(\left(2 \cdot uy\right) \cdot \color{blue}{\mathsf{PI}\left(\right)}\right), maxCos \cdot \left(ux \cdot zi\right)\right)\right) \]
    15. lower-*.f32N/A

      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \mathsf{fma}\left(yi, \sin \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \color{blue}{maxCos \cdot \left(ux \cdot zi\right)}\right)\right) \]
    16. lower-*.f3295.6

      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), \mathsf{fma}\left(yi, \sin \left(\left(2 \cdot uy\right) \cdot \pi\right), maxCos \cdot \color{blue}{\left(ux \cdot zi\right)}\right)\right) \]
  5. Applied rewrites95.6%

    \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), \mathsf{fma}\left(yi, \sin \left(\left(2 \cdot uy\right) \cdot \pi\right), maxCos \cdot \left(ux \cdot zi\right)\right)\right)} \]
  6. Final simplification95.6%

    \[\leadsto \mathsf{fma}\left(xi, \cos \left(\pi \cdot \left(2 \cdot uy\right)\right), \mathsf{fma}\left(yi, \sin \left(\pi \cdot \left(2 \cdot uy\right)\right), maxCos \cdot \left(ux \cdot zi\right)\right)\right) \]
  7. Add Preprocessing

Alternative 4: 97.4% accurate, 1.5× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := uy \cdot \left(\pi + \pi\right)\\ t_1 := \sqrt{\mathsf{fma}\left(maxCos, \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right) \cdot \left(maxCos \cdot \left(ux \cdot ux\right)\right), 1\right)}\\ \mathbf{if}\;2 \cdot uy \leq 0.031950000673532486:\\ \;\;\;\;\mathsf{fma}\left(\left(1 - ux\right) \cdot zi, ux \cdot maxCos, \mathsf{fma}\left(uy, \mathsf{fma}\left(2, t\_1 \cdot \left(\pi \cdot yi\right), t\_1 \cdot \left(uy \cdot \mathsf{fma}\left(xi, \left(\pi \cdot \pi\right) \cdot -2, \left(uy \cdot yi\right) \cdot \left(\left(\pi \cdot \left(\pi \cdot \pi\right)\right) \cdot -1.3333333333333333\right)\right)\right)\right), xi \cdot t\_1\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\mathsf{fma}\left(\cos t\_0, xi, yi \cdot \sin t\_0\right)\\ \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* uy (+ PI PI)))
        (t_1
         (sqrt
          (fma
           maxCos
           (* (* (- 1.0 ux) (+ ux -1.0)) (* maxCos (* ux ux)))
           1.0))))
   (if (<= (* 2.0 uy) 0.031950000673532486)
     (fma
      (* (- 1.0 ux) zi)
      (* ux maxCos)
      (fma
       uy
       (fma
        2.0
        (* t_1 (* PI yi))
        (*
         t_1
         (*
          uy
          (fma
           xi
           (* (* PI PI) -2.0)
           (* (* uy yi) (* (* PI (* PI PI)) -1.3333333333333333))))))
       (* xi t_1)))
     (fma (cos t_0) xi (* yi (sin t_0))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = uy * (((float) M_PI) + ((float) M_PI));
	float t_1 = sqrtf(fmaf(maxCos, (((1.0f - ux) * (ux + -1.0f)) * (maxCos * (ux * ux))), 1.0f));
	float tmp;
	if ((2.0f * uy) <= 0.031950000673532486f) {
		tmp = fmaf(((1.0f - ux) * zi), (ux * maxCos), fmaf(uy, fmaf(2.0f, (t_1 * (((float) M_PI) * yi)), (t_1 * (uy * fmaf(xi, ((((float) M_PI) * ((float) M_PI)) * -2.0f), ((uy * yi) * ((((float) M_PI) * (((float) M_PI) * ((float) M_PI))) * -1.3333333333333333f)))))), (xi * t_1)));
	} else {
		tmp = fmaf(cosf(t_0), xi, (yi * sinf(t_0)));
	}
	return tmp;
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(uy * Float32(Float32(pi) + Float32(pi)))
	t_1 = sqrt(fma(maxCos, Float32(Float32(Float32(Float32(1.0) - ux) * Float32(ux + Float32(-1.0))) * Float32(maxCos * Float32(ux * ux))), Float32(1.0)))
	tmp = Float32(0.0)
	if (Float32(Float32(2.0) * uy) <= Float32(0.031950000673532486))
		tmp = fma(Float32(Float32(Float32(1.0) - ux) * zi), Float32(ux * maxCos), fma(uy, fma(Float32(2.0), Float32(t_1 * Float32(Float32(pi) * yi)), Float32(t_1 * Float32(uy * fma(xi, Float32(Float32(Float32(pi) * Float32(pi)) * Float32(-2.0)), Float32(Float32(uy * yi) * Float32(Float32(Float32(pi) * Float32(Float32(pi) * Float32(pi))) * Float32(-1.3333333333333333))))))), Float32(xi * t_1)));
	else
		tmp = fma(cos(t_0), xi, Float32(yi * sin(t_0)));
	end
	return tmp
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := uy \cdot \left(\pi + \pi\right)\\
t_1 := \sqrt{\mathsf{fma}\left(maxCos, \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right) \cdot \left(maxCos \cdot \left(ux \cdot ux\right)\right), 1\right)}\\
\mathbf{if}\;2 \cdot uy \leq 0.031950000673532486:\\
\;\;\;\;\mathsf{fma}\left(\left(1 - ux\right) \cdot zi, ux \cdot maxCos, \mathsf{fma}\left(uy, \mathsf{fma}\left(2, t\_1 \cdot \left(\pi \cdot yi\right), t\_1 \cdot \left(uy \cdot \mathsf{fma}\left(xi, \left(\pi \cdot \pi\right) \cdot -2, \left(uy \cdot yi\right) \cdot \left(\left(\pi \cdot \left(\pi \cdot \pi\right)\right) \cdot -1.3333333333333333\right)\right)\right)\right), xi \cdot t\_1\right)\right)\\

\mathbf{else}:\\
\;\;\;\;\mathsf{fma}\left(\cos t\_0, xi, yi \cdot \sin t\_0\right)\\


\end{array}
\end{array}
Derivation
  1. Split input into 2 regimes
  2. if (*.f32 uy #s(literal 2 binary32)) < 0.0319500007

    1. Initial program 99.3%

      \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
    2. Add Preprocessing
    3. Taylor expanded in uy around 0

      \[\leadsto \color{blue}{\left(uy \cdot \left(2 \cdot \left(\left(yi \cdot \mathsf{PI}\left(\right)\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right) + uy \cdot \left(-2 \cdot \left(\left(xi \cdot {\mathsf{PI}\left(\right)}^{2}\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right) + \frac{-4}{3} \cdot \left(\left(uy \cdot \left(yi \cdot {\mathsf{PI}\left(\right)}^{3}\right)\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right)\right)\right) + xi \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right)} + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
    4. Applied rewrites99.2%

      \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(-ux\right)\right), 1\right)}, uy \cdot \mathsf{fma}\left(uy, \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(-ux\right)\right), 1\right)} \cdot \mathsf{fma}\left(-1.3333333333333333, uy \cdot \left(yi \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right)\right), -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), \left(2 \cdot \left(yi \cdot \pi\right)\right) \cdot \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(-ux\right)\right), 1\right)}\right)\right)} + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
    5. Applied rewrites99.4%

      \[\leadsto \color{blue}{\mathsf{fma}\left(\left(1 - ux\right) \cdot zi, maxCos \cdot ux, \mathsf{fma}\left(uy, \mathsf{fma}\left(2, \sqrt{\mathsf{fma}\left(maxCos, \left(maxCos \cdot \left(ux \cdot \left(-ux\right)\right)\right) \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right), 1\right)} \cdot \left(\pi \cdot yi\right), \sqrt{\mathsf{fma}\left(maxCos, \left(maxCos \cdot \left(ux \cdot \left(-ux\right)\right)\right) \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right), 1\right)} \cdot \left(\mathsf{fma}\left(xi, \left(\pi \cdot \pi\right) \cdot -2, \left(uy \cdot yi\right) \cdot \left(\left(\pi \cdot \left(\pi \cdot \pi\right)\right) \cdot -1.3333333333333333\right)\right) \cdot uy\right)\right), xi \cdot \sqrt{\mathsf{fma}\left(maxCos, \left(maxCos \cdot \left(ux \cdot \left(-ux\right)\right)\right) \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right), 1\right)}\right)\right)} \]

    if 0.0319500007 < (*.f32 uy #s(literal 2 binary32))

    1. Initial program 97.5%

      \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
    2. Add Preprocessing
    3. Taylor expanded in ux around 0

      \[\leadsto \color{blue}{xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)} \]
    4. Step-by-step derivation
      1. lower-fma.f32N/A

        \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)} \]
      2. lower-cos.f32N/A

        \[\leadsto \mathsf{fma}\left(xi, \color{blue}{\cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
      3. associate-*r*N/A

        \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
      4. lower-*.f32N/A

        \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
      5. lower-*.f32N/A

        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
      6. lower-PI.f32N/A

        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \color{blue}{\mathsf{PI}\left(\right)}\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
      7. lower-*.f32N/A

        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \color{blue}{yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
      8. lower-sin.f32N/A

        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \color{blue}{\sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
      9. associate-*r*N/A

        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
      10. lower-*.f32N/A

        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
      11. lower-*.f32N/A

        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right)\right) \]
      12. lower-PI.f3291.7

        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \color{blue}{\pi}\right)\right) \]
    5. Applied rewrites91.7%

      \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \pi\right)\right)} \]
    6. Step-by-step derivation
      1. Applied rewrites91.7%

        \[\leadsto \mathsf{fma}\left(\cos \left(uy \cdot \left(\pi + \pi\right)\right), \color{blue}{xi}, yi \cdot \sin \left(uy \cdot \left(\pi + \pi\right)\right)\right) \]
    7. Recombined 2 regimes into one program.
    8. Final simplification97.9%

      \[\leadsto \begin{array}{l} \mathbf{if}\;2 \cdot uy \leq 0.031950000673532486:\\ \;\;\;\;\mathsf{fma}\left(\left(1 - ux\right) \cdot zi, ux \cdot maxCos, \mathsf{fma}\left(uy, \mathsf{fma}\left(2, \sqrt{\mathsf{fma}\left(maxCos, \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right) \cdot \left(maxCos \cdot \left(ux \cdot ux\right)\right), 1\right)} \cdot \left(\pi \cdot yi\right), \sqrt{\mathsf{fma}\left(maxCos, \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right) \cdot \left(maxCos \cdot \left(ux \cdot ux\right)\right), 1\right)} \cdot \left(uy \cdot \mathsf{fma}\left(xi, \left(\pi \cdot \pi\right) \cdot -2, \left(uy \cdot yi\right) \cdot \left(\left(\pi \cdot \left(\pi \cdot \pi\right)\right) \cdot -1.3333333333333333\right)\right)\right)\right), xi \cdot \sqrt{\mathsf{fma}\left(maxCos, \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right) \cdot \left(maxCos \cdot \left(ux \cdot ux\right)\right), 1\right)}\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\mathsf{fma}\left(\cos \left(uy \cdot \left(\pi + \pi\right)\right), xi, yi \cdot \sin \left(uy \cdot \left(\pi + \pi\right)\right)\right)\\ \end{array} \]
    9. Add Preprocessing

    Alternative 5: 97.4% accurate, 1.5× speedup?

    \[\begin{array}{l} \\ \begin{array}{l} t_0 := uy \cdot \left(\pi + \pi\right)\\ t_1 := \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(ux \cdot ux\right) \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right), 1\right)}\\ \mathbf{if}\;2 \cdot uy \leq 0.031950000673532486:\\ \;\;\;\;\mathsf{fma}\left(maxCos, ux \cdot \left(\left(1 - ux\right) \cdot zi\right), \mathsf{fma}\left(uy, \mathsf{fma}\left(uy, t\_1 \cdot \mathsf{fma}\left(-2, xi \cdot \left(\pi \cdot \pi\right), -1.3333333333333333 \cdot \left(uy \cdot \left(yi \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right)\right)\right)\right), t\_1 \cdot \left(2 \cdot \left(\pi \cdot yi\right)\right)\right), xi \cdot t\_1\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\mathsf{fma}\left(\cos t\_0, xi, yi \cdot \sin t\_0\right)\\ \end{array} \end{array} \]
    (FPCore (xi yi zi ux uy maxCos)
     :precision binary32
     (let* ((t_0 (* uy (+ PI PI)))
            (t_1
             (sqrt
              (fma
               (* maxCos maxCos)
               (* (* ux ux) (* (- 1.0 ux) (+ ux -1.0)))
               1.0))))
       (if (<= (* 2.0 uy) 0.031950000673532486)
         (fma
          maxCos
          (* ux (* (- 1.0 ux) zi))
          (fma
           uy
           (fma
            uy
            (*
             t_1
             (fma
              -2.0
              (* xi (* PI PI))
              (* -1.3333333333333333 (* uy (* yi (* PI (* PI PI)))))))
            (* t_1 (* 2.0 (* PI yi))))
           (* xi t_1)))
         (fma (cos t_0) xi (* yi (sin t_0))))))
    float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
    	float t_0 = uy * (((float) M_PI) + ((float) M_PI));
    	float t_1 = sqrtf(fmaf((maxCos * maxCos), ((ux * ux) * ((1.0f - ux) * (ux + -1.0f))), 1.0f));
    	float tmp;
    	if ((2.0f * uy) <= 0.031950000673532486f) {
    		tmp = fmaf(maxCos, (ux * ((1.0f - ux) * zi)), fmaf(uy, fmaf(uy, (t_1 * fmaf(-2.0f, (xi * (((float) M_PI) * ((float) M_PI))), (-1.3333333333333333f * (uy * (yi * (((float) M_PI) * (((float) M_PI) * ((float) M_PI)))))))), (t_1 * (2.0f * (((float) M_PI) * yi)))), (xi * t_1)));
    	} else {
    		tmp = fmaf(cosf(t_0), xi, (yi * sinf(t_0)));
    	}
    	return tmp;
    }
    
    function code(xi, yi, zi, ux, uy, maxCos)
    	t_0 = Float32(uy * Float32(Float32(pi) + Float32(pi)))
    	t_1 = sqrt(fma(Float32(maxCos * maxCos), Float32(Float32(ux * ux) * Float32(Float32(Float32(1.0) - ux) * Float32(ux + Float32(-1.0)))), Float32(1.0)))
    	tmp = Float32(0.0)
    	if (Float32(Float32(2.0) * uy) <= Float32(0.031950000673532486))
    		tmp = fma(maxCos, Float32(ux * Float32(Float32(Float32(1.0) - ux) * zi)), fma(uy, fma(uy, Float32(t_1 * fma(Float32(-2.0), Float32(xi * Float32(Float32(pi) * Float32(pi))), Float32(Float32(-1.3333333333333333) * Float32(uy * Float32(yi * Float32(Float32(pi) * Float32(Float32(pi) * Float32(pi)))))))), Float32(t_1 * Float32(Float32(2.0) * Float32(Float32(pi) * yi)))), Float32(xi * t_1)));
    	else
    		tmp = fma(cos(t_0), xi, Float32(yi * sin(t_0)));
    	end
    	return tmp
    end
    
    \begin{array}{l}
    
    \\
    \begin{array}{l}
    t_0 := uy \cdot \left(\pi + \pi\right)\\
    t_1 := \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(ux \cdot ux\right) \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right), 1\right)}\\
    \mathbf{if}\;2 \cdot uy \leq 0.031950000673532486:\\
    \;\;\;\;\mathsf{fma}\left(maxCos, ux \cdot \left(\left(1 - ux\right) \cdot zi\right), \mathsf{fma}\left(uy, \mathsf{fma}\left(uy, t\_1 \cdot \mathsf{fma}\left(-2, xi \cdot \left(\pi \cdot \pi\right), -1.3333333333333333 \cdot \left(uy \cdot \left(yi \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right)\right)\right)\right), t\_1 \cdot \left(2 \cdot \left(\pi \cdot yi\right)\right)\right), xi \cdot t\_1\right)\right)\\
    
    \mathbf{else}:\\
    \;\;\;\;\mathsf{fma}\left(\cos t\_0, xi, yi \cdot \sin t\_0\right)\\
    
    
    \end{array}
    \end{array}
    
    Derivation
    1. Split input into 2 regimes
    2. if (*.f32 uy #s(literal 2 binary32)) < 0.0319500007

      1. Initial program 99.3%

        \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
      2. Add Preprocessing
      3. Taylor expanded in zi around inf

        \[\leadsto \color{blue}{maxCos \cdot \left(ux \cdot \left(zi \cdot \left(1 - ux\right)\right)\right)} \]
      4. Step-by-step derivation
        1. lower-*.f32N/A

          \[\leadsto \color{blue}{maxCos \cdot \left(ux \cdot \left(zi \cdot \left(1 - ux\right)\right)\right)} \]
        2. associate-*r*N/A

          \[\leadsto maxCos \cdot \color{blue}{\left(\left(ux \cdot zi\right) \cdot \left(1 - ux\right)\right)} \]
        3. lower-*.f32N/A

          \[\leadsto maxCos \cdot \color{blue}{\left(\left(ux \cdot zi\right) \cdot \left(1 - ux\right)\right)} \]
        4. lower-*.f32N/A

          \[\leadsto maxCos \cdot \left(\color{blue}{\left(ux \cdot zi\right)} \cdot \left(1 - ux\right)\right) \]
        5. lower--.f3215.2

          \[\leadsto maxCos \cdot \left(\left(ux \cdot zi\right) \cdot \color{blue}{\left(1 - ux\right)}\right) \]
      5. Applied rewrites15.2%

        \[\leadsto \color{blue}{maxCos \cdot \left(\left(ux \cdot zi\right) \cdot \left(1 - ux\right)\right)} \]
      6. Taylor expanded in uy around 0

        \[\leadsto \color{blue}{maxCos \cdot \left(ux \cdot \left(zi \cdot \left(1 - ux\right)\right)\right) + \left(uy \cdot \left(2 \cdot \left(\left(yi \cdot \mathsf{PI}\left(\right)\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right) + uy \cdot \left(-2 \cdot \left(\left(xi \cdot {\mathsf{PI}\left(\right)}^{2}\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right) + \frac{-4}{3} \cdot \left(\left(uy \cdot \left(yi \cdot {\mathsf{PI}\left(\right)}^{3}\right)\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right)\right)\right) + xi \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right)} \]
      7. Applied rewrites99.3%

        \[\leadsto \color{blue}{\mathsf{fma}\left(maxCos, ux \cdot \left(\left(1 - ux\right) \cdot zi\right), \mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(ux \cdot ux\right) \cdot \left(-\left(1 - ux\right) \cdot \left(1 - ux\right)\right), 1\right)} \cdot \mathsf{fma}\left(-2, xi \cdot \left(\pi \cdot \pi\right), -1.3333333333333333 \cdot \left(uy \cdot \left(yi \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right)\right)\right)\right), \left(2 \cdot \left(yi \cdot \pi\right)\right) \cdot \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(ux \cdot ux\right) \cdot \left(-\left(1 - ux\right) \cdot \left(1 - ux\right)\right), 1\right)}\right), xi \cdot \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(ux \cdot ux\right) \cdot \left(-\left(1 - ux\right) \cdot \left(1 - ux\right)\right), 1\right)}\right)\right)} \]

      if 0.0319500007 < (*.f32 uy #s(literal 2 binary32))

      1. Initial program 97.5%

        \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
      2. Add Preprocessing
      3. Taylor expanded in ux around 0

        \[\leadsto \color{blue}{xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)} \]
      4. Step-by-step derivation
        1. lower-fma.f32N/A

          \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)} \]
        2. lower-cos.f32N/A

          \[\leadsto \mathsf{fma}\left(xi, \color{blue}{\cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
        3. associate-*r*N/A

          \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
        4. lower-*.f32N/A

          \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
        5. lower-*.f32N/A

          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
        6. lower-PI.f32N/A

          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \color{blue}{\mathsf{PI}\left(\right)}\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
        7. lower-*.f32N/A

          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \color{blue}{yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
        8. lower-sin.f32N/A

          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \color{blue}{\sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
        9. associate-*r*N/A

          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
        10. lower-*.f32N/A

          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
        11. lower-*.f32N/A

          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right)\right) \]
        12. lower-PI.f3291.7

          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \color{blue}{\pi}\right)\right) \]
      5. Applied rewrites91.7%

        \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \pi\right)\right)} \]
      6. Step-by-step derivation
        1. Applied rewrites91.7%

          \[\leadsto \mathsf{fma}\left(\cos \left(uy \cdot \left(\pi + \pi\right)\right), \color{blue}{xi}, yi \cdot \sin \left(uy \cdot \left(\pi + \pi\right)\right)\right) \]
      7. Recombined 2 regimes into one program.
      8. Final simplification97.8%

        \[\leadsto \begin{array}{l} \mathbf{if}\;2 \cdot uy \leq 0.031950000673532486:\\ \;\;\;\;\mathsf{fma}\left(maxCos, ux \cdot \left(\left(1 - ux\right) \cdot zi\right), \mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(ux \cdot ux\right) \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right), 1\right)} \cdot \mathsf{fma}\left(-2, xi \cdot \left(\pi \cdot \pi\right), -1.3333333333333333 \cdot \left(uy \cdot \left(yi \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right)\right)\right)\right), \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(ux \cdot ux\right) \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right), 1\right)} \cdot \left(2 \cdot \left(\pi \cdot yi\right)\right)\right), xi \cdot \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(ux \cdot ux\right) \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right), 1\right)}\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\mathsf{fma}\left(\cos \left(uy \cdot \left(\pi + \pi\right)\right), xi, yi \cdot \sin \left(uy \cdot \left(\pi + \pi\right)\right)\right)\\ \end{array} \]
      9. Add Preprocessing

      Alternative 6: 97.2% accurate, 1.5× speedup?

      \[\begin{array}{l} \\ \begin{array}{l} t_0 := uy \cdot \left(\pi + \pi\right)\\ \mathbf{if}\;2 \cdot uy \leq 0.031950000673532486:\\ \;\;\;\;\mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \mathsf{fma}\left(-1.3333333333333333, \left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right), -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right) + zi \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\mathsf{fma}\left(\cos t\_0, xi, yi \cdot \sin t\_0\right)\\ \end{array} \end{array} \]
      (FPCore (xi yi zi ux uy maxCos)
       :precision binary32
       (let* ((t_0 (* uy (+ PI PI))))
         (if (<= (* 2.0 uy) 0.031950000673532486)
           (+
            (fma
             uy
             (fma
              uy
              (fma
               -1.3333333333333333
               (* (* uy yi) (* PI (* PI PI)))
               (* -2.0 (* xi (* PI PI))))
              (* 2.0 (* PI yi)))
             xi)
            (* zi (* ux (* (- 1.0 ux) maxCos))))
           (fma (cos t_0) xi (* yi (sin t_0))))))
      float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
      	float t_0 = uy * (((float) M_PI) + ((float) M_PI));
      	float tmp;
      	if ((2.0f * uy) <= 0.031950000673532486f) {
      		tmp = fmaf(uy, fmaf(uy, fmaf(-1.3333333333333333f, ((uy * yi) * (((float) M_PI) * (((float) M_PI) * ((float) M_PI)))), (-2.0f * (xi * (((float) M_PI) * ((float) M_PI))))), (2.0f * (((float) M_PI) * yi))), xi) + (zi * (ux * ((1.0f - ux) * maxCos)));
      	} else {
      		tmp = fmaf(cosf(t_0), xi, (yi * sinf(t_0)));
      	}
      	return tmp;
      }
      
      function code(xi, yi, zi, ux, uy, maxCos)
      	t_0 = Float32(uy * Float32(Float32(pi) + Float32(pi)))
      	tmp = Float32(0.0)
      	if (Float32(Float32(2.0) * uy) <= Float32(0.031950000673532486))
      		tmp = Float32(fma(uy, fma(uy, fma(Float32(-1.3333333333333333), Float32(Float32(uy * yi) * Float32(Float32(pi) * Float32(Float32(pi) * Float32(pi)))), Float32(Float32(-2.0) * Float32(xi * Float32(Float32(pi) * Float32(pi))))), Float32(Float32(2.0) * Float32(Float32(pi) * yi))), xi) + Float32(zi * Float32(ux * Float32(Float32(Float32(1.0) - ux) * maxCos))));
      	else
      		tmp = fma(cos(t_0), xi, Float32(yi * sin(t_0)));
      	end
      	return tmp
      end
      
      \begin{array}{l}
      
      \\
      \begin{array}{l}
      t_0 := uy \cdot \left(\pi + \pi\right)\\
      \mathbf{if}\;2 \cdot uy \leq 0.031950000673532486:\\
      \;\;\;\;\mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \mathsf{fma}\left(-1.3333333333333333, \left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right), -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right) + zi \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\right)\\
      
      \mathbf{else}:\\
      \;\;\;\;\mathsf{fma}\left(\cos t\_0, xi, yi \cdot \sin t\_0\right)\\
      
      
      \end{array}
      \end{array}
      
      Derivation
      1. Split input into 2 regimes
      2. if (*.f32 uy #s(literal 2 binary32)) < 0.0319500007

        1. Initial program 99.3%

          \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
        2. Add Preprocessing
        3. Taylor expanded in uy around 0

          \[\leadsto \color{blue}{\left(uy \cdot \left(2 \cdot \left(\left(yi \cdot \mathsf{PI}\left(\right)\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right) + uy \cdot \left(-2 \cdot \left(\left(xi \cdot {\mathsf{PI}\left(\right)}^{2}\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right) + \frac{-4}{3} \cdot \left(\left(uy \cdot \left(yi \cdot {\mathsf{PI}\left(\right)}^{3}\right)\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right)\right)\right) + xi \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right)} + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
        4. Applied rewrites99.2%

          \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(-ux\right)\right), 1\right)}, uy \cdot \mathsf{fma}\left(uy, \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(-ux\right)\right), 1\right)} \cdot \mathsf{fma}\left(-1.3333333333333333, uy \cdot \left(yi \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right)\right), -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), \left(2 \cdot \left(yi \cdot \pi\right)\right) \cdot \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(-ux\right)\right), 1\right)}\right)\right)} + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
        5. Taylor expanded in maxCos around 0

          \[\leadsto \left(xi + \color{blue}{uy \cdot \left(2 \cdot \left(yi \cdot \mathsf{PI}\left(\right)\right) + uy \cdot \left(-2 \cdot \left(xi \cdot {\mathsf{PI}\left(\right)}^{2}\right) + \frac{-4}{3} \cdot \left(uy \cdot \left(yi \cdot {\mathsf{PI}\left(\right)}^{3}\right)\right)\right)\right)}\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
        6. Step-by-step derivation
          1. Applied rewrites99.1%

            \[\leadsto \mathsf{fma}\left(uy, \color{blue}{\mathsf{fma}\left(uy, \mathsf{fma}\left(-1.3333333333333333, \left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right), -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), 2 \cdot \left(yi \cdot \pi\right)\right)}, xi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]

          if 0.0319500007 < (*.f32 uy #s(literal 2 binary32))

          1. Initial program 97.5%

            \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
          2. Add Preprocessing
          3. Taylor expanded in ux around 0

            \[\leadsto \color{blue}{xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)} \]
          4. Step-by-step derivation
            1. lower-fma.f32N/A

              \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)} \]
            2. lower-cos.f32N/A

              \[\leadsto \mathsf{fma}\left(xi, \color{blue}{\cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
            3. associate-*r*N/A

              \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
            4. lower-*.f32N/A

              \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
            5. lower-*.f32N/A

              \[\leadsto \mathsf{fma}\left(xi, \cos \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
            6. lower-PI.f32N/A

              \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \color{blue}{\mathsf{PI}\left(\right)}\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
            7. lower-*.f32N/A

              \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \color{blue}{yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
            8. lower-sin.f32N/A

              \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \color{blue}{\sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
            9. associate-*r*N/A

              \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
            10. lower-*.f32N/A

              \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
            11. lower-*.f32N/A

              \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right)\right) \]
            12. lower-PI.f3291.7

              \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \color{blue}{\pi}\right)\right) \]
          5. Applied rewrites91.7%

            \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \pi\right)\right)} \]
          6. Step-by-step derivation
            1. Applied rewrites91.7%

              \[\leadsto \mathsf{fma}\left(\cos \left(uy \cdot \left(\pi + \pi\right)\right), \color{blue}{xi}, yi \cdot \sin \left(uy \cdot \left(\pi + \pi\right)\right)\right) \]
          7. Recombined 2 regimes into one program.
          8. Final simplification97.7%

            \[\leadsto \begin{array}{l} \mathbf{if}\;2 \cdot uy \leq 0.031950000673532486:\\ \;\;\;\;\mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \mathsf{fma}\left(-1.3333333333333333, \left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right), -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right) + zi \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\mathsf{fma}\left(\cos \left(uy \cdot \left(\pi + \pi\right)\right), xi, yi \cdot \sin \left(uy \cdot \left(\pi + \pi\right)\right)\right)\\ \end{array} \]
          9. Add Preprocessing

          Alternative 7: 89.5% accurate, 1.6× speedup?

          \[\begin{array}{l} \\ zi \cdot \mathsf{fma}\left(\sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(ux \cdot ux\right) \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right), 1\right)}, \mathsf{fma}\left(\cos \left(\pi \cdot \left(2 \cdot uy\right)\right), \frac{xi}{zi}, \frac{2 \cdot \left(uy \cdot \left(\pi \cdot yi\right)\right)}{zi}\right), maxCos \cdot \left(ux \cdot \left(1 - ux\right)\right)\right) \end{array} \]
          (FPCore (xi yi zi ux uy maxCos)
           :precision binary32
           (*
            zi
            (fma
             (sqrt (fma (* maxCos maxCos) (* (* ux ux) (* (- 1.0 ux) (+ ux -1.0))) 1.0))
             (fma (cos (* PI (* 2.0 uy))) (/ xi zi) (/ (* 2.0 (* uy (* PI yi))) zi))
             (* maxCos (* ux (- 1.0 ux))))))
          float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
          	return zi * fmaf(sqrtf(fmaf((maxCos * maxCos), ((ux * ux) * ((1.0f - ux) * (ux + -1.0f))), 1.0f)), fmaf(cosf((((float) M_PI) * (2.0f * uy))), (xi / zi), ((2.0f * (uy * (((float) M_PI) * yi))) / zi)), (maxCos * (ux * (1.0f - ux))));
          }
          
          function code(xi, yi, zi, ux, uy, maxCos)
          	return Float32(zi * fma(sqrt(fma(Float32(maxCos * maxCos), Float32(Float32(ux * ux) * Float32(Float32(Float32(1.0) - ux) * Float32(ux + Float32(-1.0)))), Float32(1.0))), fma(cos(Float32(Float32(pi) * Float32(Float32(2.0) * uy))), Float32(xi / zi), Float32(Float32(Float32(2.0) * Float32(uy * Float32(Float32(pi) * yi))) / zi)), Float32(maxCos * Float32(ux * Float32(Float32(1.0) - ux)))))
          end
          
          \begin{array}{l}
          
          \\
          zi \cdot \mathsf{fma}\left(\sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(ux \cdot ux\right) \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right), 1\right)}, \mathsf{fma}\left(\cos \left(\pi \cdot \left(2 \cdot uy\right)\right), \frac{xi}{zi}, \frac{2 \cdot \left(uy \cdot \left(\pi \cdot yi\right)\right)}{zi}\right), maxCos \cdot \left(ux \cdot \left(1 - ux\right)\right)\right)
          \end{array}
          
          Derivation
          1. Initial program 99.0%

            \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
          2. Add Preprocessing
          3. Taylor expanded in zi around inf

            \[\leadsto \color{blue}{zi \cdot \left(maxCos \cdot \left(ux \cdot \left(1 - ux\right)\right) + \left(\frac{xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}{zi} \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)} + \frac{yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}{zi} \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right)\right)} \]
          4. Applied rewrites98.3%

            \[\leadsto \color{blue}{zi \cdot \mathsf{fma}\left(\sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(-ux\right)\right), 1\right)}, \mathsf{fma}\left(\cos \left(\left(2 \cdot uy\right) \cdot \pi\right), \frac{xi}{zi}, \sin \left(\left(2 \cdot uy\right) \cdot \pi\right) \cdot \frac{yi}{zi}\right), maxCos \cdot \left(ux \cdot \left(1 - ux\right)\right)\right)} \]
          5. Taylor expanded in uy around 0

            \[\leadsto zi \cdot \mathsf{fma}\left(\sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(\mathsf{neg}\left(ux\right)\right)\right), 1\right)}, \mathsf{fma}\left(\cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \frac{xi}{zi}, 2 \cdot \frac{uy \cdot \left(yi \cdot \mathsf{PI}\left(\right)\right)}{zi}\right), maxCos \cdot \left(ux \cdot \left(1 - ux\right)\right)\right) \]
          6. Step-by-step derivation
            1. Applied rewrites90.3%

              \[\leadsto zi \cdot \mathsf{fma}\left(\sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(-ux\right)\right), 1\right)}, \mathsf{fma}\left(\cos \left(\left(2 \cdot uy\right) \cdot \pi\right), \frac{xi}{zi}, \frac{2 \cdot \left(uy \cdot \left(yi \cdot \pi\right)\right)}{zi}\right), maxCos \cdot \left(ux \cdot \left(1 - ux\right)\right)\right) \]
            2. Final simplification90.3%

              \[\leadsto zi \cdot \mathsf{fma}\left(\sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(ux \cdot ux\right) \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right), 1\right)}, \mathsf{fma}\left(\cos \left(\pi \cdot \left(2 \cdot uy\right)\right), \frac{xi}{zi}, \frac{2 \cdot \left(uy \cdot \left(\pi \cdot yi\right)\right)}{zi}\right), maxCos \cdot \left(ux \cdot \left(1 - ux\right)\right)\right) \]
            3. Add Preprocessing

            Alternative 8: 92.4% accurate, 2.1× speedup?

            \[\begin{array}{l} \\ \begin{array}{l} t_0 := \pi \cdot \left(\pi \cdot \pi\right)\\ \mathbf{if}\;2 \cdot uy \leq 0.031950000673532486:\\ \;\;\;\;\mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \mathsf{fma}\left(-1.3333333333333333, \left(uy \cdot yi\right) \cdot t\_0, -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right) + zi \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\mathsf{fma}\left(xi, \cos \left(\pi \cdot \left(2 \cdot uy\right)\right), yi \cdot \left(uy \cdot \mathsf{fma}\left(-1.3333333333333333 \cdot \left(uy \cdot uy\right), t\_0, 2 \cdot \pi\right)\right)\right)\\ \end{array} \end{array} \]
            (FPCore (xi yi zi ux uy maxCos)
             :precision binary32
             (let* ((t_0 (* PI (* PI PI))))
               (if (<= (* 2.0 uy) 0.031950000673532486)
                 (+
                  (fma
                   uy
                   (fma
                    uy
                    (fma -1.3333333333333333 (* (* uy yi) t_0) (* -2.0 (* xi (* PI PI))))
                    (* 2.0 (* PI yi)))
                   xi)
                  (* zi (* ux (* (- 1.0 ux) maxCos))))
                 (fma
                  xi
                  (cos (* PI (* 2.0 uy)))
                  (* yi (* uy (fma (* -1.3333333333333333 (* uy uy)) t_0 (* 2.0 PI))))))))
            float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
            	float t_0 = ((float) M_PI) * (((float) M_PI) * ((float) M_PI));
            	float tmp;
            	if ((2.0f * uy) <= 0.031950000673532486f) {
            		tmp = fmaf(uy, fmaf(uy, fmaf(-1.3333333333333333f, ((uy * yi) * t_0), (-2.0f * (xi * (((float) M_PI) * ((float) M_PI))))), (2.0f * (((float) M_PI) * yi))), xi) + (zi * (ux * ((1.0f - ux) * maxCos)));
            	} else {
            		tmp = fmaf(xi, cosf((((float) M_PI) * (2.0f * uy))), (yi * (uy * fmaf((-1.3333333333333333f * (uy * uy)), t_0, (2.0f * ((float) M_PI))))));
            	}
            	return tmp;
            }
            
            function code(xi, yi, zi, ux, uy, maxCos)
            	t_0 = Float32(Float32(pi) * Float32(Float32(pi) * Float32(pi)))
            	tmp = Float32(0.0)
            	if (Float32(Float32(2.0) * uy) <= Float32(0.031950000673532486))
            		tmp = Float32(fma(uy, fma(uy, fma(Float32(-1.3333333333333333), Float32(Float32(uy * yi) * t_0), Float32(Float32(-2.0) * Float32(xi * Float32(Float32(pi) * Float32(pi))))), Float32(Float32(2.0) * Float32(Float32(pi) * yi))), xi) + Float32(zi * Float32(ux * Float32(Float32(Float32(1.0) - ux) * maxCos))));
            	else
            		tmp = fma(xi, cos(Float32(Float32(pi) * Float32(Float32(2.0) * uy))), Float32(yi * Float32(uy * fma(Float32(Float32(-1.3333333333333333) * Float32(uy * uy)), t_0, Float32(Float32(2.0) * Float32(pi))))));
            	end
            	return tmp
            end
            
            \begin{array}{l}
            
            \\
            \begin{array}{l}
            t_0 := \pi \cdot \left(\pi \cdot \pi\right)\\
            \mathbf{if}\;2 \cdot uy \leq 0.031950000673532486:\\
            \;\;\;\;\mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \mathsf{fma}\left(-1.3333333333333333, \left(uy \cdot yi\right) \cdot t\_0, -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right) + zi \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\right)\\
            
            \mathbf{else}:\\
            \;\;\;\;\mathsf{fma}\left(xi, \cos \left(\pi \cdot \left(2 \cdot uy\right)\right), yi \cdot \left(uy \cdot \mathsf{fma}\left(-1.3333333333333333 \cdot \left(uy \cdot uy\right), t\_0, 2 \cdot \pi\right)\right)\right)\\
            
            
            \end{array}
            \end{array}
            
            Derivation
            1. Split input into 2 regimes
            2. if (*.f32 uy #s(literal 2 binary32)) < 0.0319500007

              1. Initial program 99.3%

                \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
              2. Add Preprocessing
              3. Taylor expanded in uy around 0

                \[\leadsto \color{blue}{\left(uy \cdot \left(2 \cdot \left(\left(yi \cdot \mathsf{PI}\left(\right)\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right) + uy \cdot \left(-2 \cdot \left(\left(xi \cdot {\mathsf{PI}\left(\right)}^{2}\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right) + \frac{-4}{3} \cdot \left(\left(uy \cdot \left(yi \cdot {\mathsf{PI}\left(\right)}^{3}\right)\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right)\right)\right) + xi \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right)} + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
              4. Applied rewrites99.2%

                \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(-ux\right)\right), 1\right)}, uy \cdot \mathsf{fma}\left(uy, \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(-ux\right)\right), 1\right)} \cdot \mathsf{fma}\left(-1.3333333333333333, uy \cdot \left(yi \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right)\right), -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), \left(2 \cdot \left(yi \cdot \pi\right)\right) \cdot \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(-ux\right)\right), 1\right)}\right)\right)} + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
              5. Taylor expanded in maxCos around 0

                \[\leadsto \left(xi + \color{blue}{uy \cdot \left(2 \cdot \left(yi \cdot \mathsf{PI}\left(\right)\right) + uy \cdot \left(-2 \cdot \left(xi \cdot {\mathsf{PI}\left(\right)}^{2}\right) + \frac{-4}{3} \cdot \left(uy \cdot \left(yi \cdot {\mathsf{PI}\left(\right)}^{3}\right)\right)\right)\right)}\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
              6. Step-by-step derivation
                1. Applied rewrites99.1%

                  \[\leadsto \mathsf{fma}\left(uy, \color{blue}{\mathsf{fma}\left(uy, \mathsf{fma}\left(-1.3333333333333333, \left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right), -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), 2 \cdot \left(yi \cdot \pi\right)\right)}, xi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]

                if 0.0319500007 < (*.f32 uy #s(literal 2 binary32))

                1. Initial program 97.5%

                  \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
                2. Add Preprocessing
                3. Taylor expanded in ux around 0

                  \[\leadsto \color{blue}{xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)} \]
                4. Step-by-step derivation
                  1. lower-fma.f32N/A

                    \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)} \]
                  2. lower-cos.f32N/A

                    \[\leadsto \mathsf{fma}\left(xi, \color{blue}{\cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                  3. associate-*r*N/A

                    \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                  4. lower-*.f32N/A

                    \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                  5. lower-*.f32N/A

                    \[\leadsto \mathsf{fma}\left(xi, \cos \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                  6. lower-PI.f32N/A

                    \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \color{blue}{\mathsf{PI}\left(\right)}\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                  7. lower-*.f32N/A

                    \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \color{blue}{yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
                  8. lower-sin.f32N/A

                    \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \color{blue}{\sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
                  9. associate-*r*N/A

                    \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
                  10. lower-*.f32N/A

                    \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
                  11. lower-*.f32N/A

                    \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right)\right) \]
                  12. lower-PI.f3291.7

                    \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \color{blue}{\pi}\right)\right) \]
                5. Applied rewrites91.7%

                  \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \pi\right)\right)} \]
                6. Taylor expanded in uy around 0

                  \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \left(uy \cdot \left(\frac{-4}{3} \cdot \left({uy}^{2} \cdot {\mathsf{PI}\left(\right)}^{3}\right) + 2 \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                7. Step-by-step derivation
                  1. Applied rewrites68.4%

                    \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \left(uy \cdot \mathsf{fma}\left(-1.3333333333333333 \cdot \left(uy \cdot uy\right), \pi \cdot \left(\pi \cdot \pi\right), 2 \cdot \pi\right)\right)\right) \]
                8. Recombined 2 regimes into one program.
                9. Final simplification93.1%

                  \[\leadsto \begin{array}{l} \mathbf{if}\;2 \cdot uy \leq 0.031950000673532486:\\ \;\;\;\;\mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \mathsf{fma}\left(-1.3333333333333333, \left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right), -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right) + zi \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\mathsf{fma}\left(xi, \cos \left(\pi \cdot \left(2 \cdot uy\right)\right), yi \cdot \left(uy \cdot \mathsf{fma}\left(-1.3333333333333333 \cdot \left(uy \cdot uy\right), \pi \cdot \left(\pi \cdot \pi\right), 2 \cdot \pi\right)\right)\right)\\ \end{array} \]
                10. Add Preprocessing

                Alternative 9: 86.7% accurate, 3.8× speedup?

                \[\begin{array}{l} \\ \begin{array}{l} \mathbf{if}\;2 \cdot uy \leq 0.0006000000284984708:\\ \;\;\;\;\mathsf{fma}\left(\sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(ux \cdot ux\right) \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right), 1\right)}, \mathsf{fma}\left(2, uy \cdot \left(\pi \cdot yi\right), xi\right), maxCos \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot zi\right)\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \mathsf{fma}\left(-2, xi \cdot \left(\pi \cdot \pi\right), -1.3333333333333333 \cdot \left(\left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right)\\ \end{array} \end{array} \]
                (FPCore (xi yi zi ux uy maxCos)
                 :precision binary32
                 (if (<= (* 2.0 uy) 0.0006000000284984708)
                   (fma
                    (sqrt (fma (* maxCos maxCos) (* (* ux ux) (* (- 1.0 ux) (+ ux -1.0))) 1.0))
                    (fma 2.0 (* uy (* PI yi)) xi)
                    (* maxCos (* ux (* (- 1.0 ux) zi))))
                   (fma
                    uy
                    (fma
                     uy
                     (fma
                      -2.0
                      (* xi (* PI PI))
                      (* -1.3333333333333333 (* (* uy yi) (* PI (* PI PI)))))
                     (* 2.0 (* PI yi)))
                    xi)))
                float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
                	float tmp;
                	if ((2.0f * uy) <= 0.0006000000284984708f) {
                		tmp = fmaf(sqrtf(fmaf((maxCos * maxCos), ((ux * ux) * ((1.0f - ux) * (ux + -1.0f))), 1.0f)), fmaf(2.0f, (uy * (((float) M_PI) * yi)), xi), (maxCos * (ux * ((1.0f - ux) * zi))));
                	} else {
                		tmp = fmaf(uy, fmaf(uy, fmaf(-2.0f, (xi * (((float) M_PI) * ((float) M_PI))), (-1.3333333333333333f * ((uy * yi) * (((float) M_PI) * (((float) M_PI) * ((float) M_PI)))))), (2.0f * (((float) M_PI) * yi))), xi);
                	}
                	return tmp;
                }
                
                function code(xi, yi, zi, ux, uy, maxCos)
                	tmp = Float32(0.0)
                	if (Float32(Float32(2.0) * uy) <= Float32(0.0006000000284984708))
                		tmp = fma(sqrt(fma(Float32(maxCos * maxCos), Float32(Float32(ux * ux) * Float32(Float32(Float32(1.0) - ux) * Float32(ux + Float32(-1.0)))), Float32(1.0))), fma(Float32(2.0), Float32(uy * Float32(Float32(pi) * yi)), xi), Float32(maxCos * Float32(ux * Float32(Float32(Float32(1.0) - ux) * zi))));
                	else
                		tmp = fma(uy, fma(uy, fma(Float32(-2.0), Float32(xi * Float32(Float32(pi) * Float32(pi))), Float32(Float32(-1.3333333333333333) * Float32(Float32(uy * yi) * Float32(Float32(pi) * Float32(Float32(pi) * Float32(pi)))))), Float32(Float32(2.0) * Float32(Float32(pi) * yi))), xi);
                	end
                	return tmp
                end
                
                \begin{array}{l}
                
                \\
                \begin{array}{l}
                \mathbf{if}\;2 \cdot uy \leq 0.0006000000284984708:\\
                \;\;\;\;\mathsf{fma}\left(\sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(ux \cdot ux\right) \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right), 1\right)}, \mathsf{fma}\left(2, uy \cdot \left(\pi \cdot yi\right), xi\right), maxCos \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot zi\right)\right)\right)\\
                
                \mathbf{else}:\\
                \;\;\;\;\mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \mathsf{fma}\left(-2, xi \cdot \left(\pi \cdot \pi\right), -1.3333333333333333 \cdot \left(\left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right)\\
                
                
                \end{array}
                \end{array}
                
                Derivation
                1. Split input into 2 regimes
                2. if (*.f32 uy #s(literal 2 binary32)) < 6.00000028e-4

                  1. Initial program 99.4%

                    \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
                  2. Add Preprocessing
                  3. Taylor expanded in zi around inf

                    \[\leadsto \color{blue}{maxCos \cdot \left(ux \cdot \left(zi \cdot \left(1 - ux\right)\right)\right)} \]
                  4. Step-by-step derivation
                    1. lower-*.f32N/A

                      \[\leadsto \color{blue}{maxCos \cdot \left(ux \cdot \left(zi \cdot \left(1 - ux\right)\right)\right)} \]
                    2. associate-*r*N/A

                      \[\leadsto maxCos \cdot \color{blue}{\left(\left(ux \cdot zi\right) \cdot \left(1 - ux\right)\right)} \]
                    3. lower-*.f32N/A

                      \[\leadsto maxCos \cdot \color{blue}{\left(\left(ux \cdot zi\right) \cdot \left(1 - ux\right)\right)} \]
                    4. lower-*.f32N/A

                      \[\leadsto maxCos \cdot \left(\color{blue}{\left(ux \cdot zi\right)} \cdot \left(1 - ux\right)\right) \]
                    5. lower--.f3215.8

                      \[\leadsto maxCos \cdot \left(\left(ux \cdot zi\right) \cdot \color{blue}{\left(1 - ux\right)}\right) \]
                  5. Applied rewrites15.8%

                    \[\leadsto \color{blue}{maxCos \cdot \left(\left(ux \cdot zi\right) \cdot \left(1 - ux\right)\right)} \]
                  6. Taylor expanded in uy around 0

                    \[\leadsto \color{blue}{2 \cdot \left(\left(uy \cdot \left(yi \cdot \mathsf{PI}\left(\right)\right)\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right) + \left(maxCos \cdot \left(ux \cdot \left(zi \cdot \left(1 - ux\right)\right)\right) + xi \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right)} \]
                  7. Applied rewrites98.7%

                    \[\leadsto \color{blue}{\mathsf{fma}\left(\sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(ux \cdot ux\right) \cdot \left(-\left(1 - ux\right) \cdot \left(1 - ux\right)\right), 1\right)}, \mathsf{fma}\left(2, uy \cdot \left(yi \cdot \pi\right), xi\right), maxCos \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot zi\right)\right)\right)} \]

                  if 6.00000028e-4 < (*.f32 uy #s(literal 2 binary32))

                  1. Initial program 98.1%

                    \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
                  2. Add Preprocessing
                  3. Taylor expanded in ux around 0

                    \[\leadsto \color{blue}{xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)} \]
                  4. Step-by-step derivation
                    1. lower-fma.f32N/A

                      \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)} \]
                    2. lower-cos.f32N/A

                      \[\leadsto \mathsf{fma}\left(xi, \color{blue}{\cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                    3. associate-*r*N/A

                      \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                    4. lower-*.f32N/A

                      \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                    5. lower-*.f32N/A

                      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                    6. lower-PI.f32N/A

                      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \color{blue}{\mathsf{PI}\left(\right)}\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                    7. lower-*.f32N/A

                      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \color{blue}{yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
                    8. lower-sin.f32N/A

                      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \color{blue}{\sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
                    9. associate-*r*N/A

                      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
                    10. lower-*.f32N/A

                      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
                    11. lower-*.f32N/A

                      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right)\right) \]
                    12. lower-PI.f3291.0

                      \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \color{blue}{\pi}\right)\right) \]
                  5. Applied rewrites91.0%

                    \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \pi\right)\right)} \]
                  6. Taylor expanded in uy around 0

                    \[\leadsto xi + \color{blue}{uy \cdot \left(2 \cdot \left(yi \cdot \mathsf{PI}\left(\right)\right) + uy \cdot \left(-2 \cdot \left(xi \cdot {\mathsf{PI}\left(\right)}^{2}\right) + \frac{-4}{3} \cdot \left(uy \cdot \left(yi \cdot {\mathsf{PI}\left(\right)}^{3}\right)\right)\right)\right)} \]
                  7. Step-by-step derivation
                    1. Applied rewrites66.5%

                      \[\leadsto \mathsf{fma}\left(uy, \color{blue}{\mathsf{fma}\left(uy, \mathsf{fma}\left(-2, xi \cdot \left(\pi \cdot \pi\right), -1.3333333333333333 \cdot \left(\left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right)}, xi\right) \]
                  8. Recombined 2 regimes into one program.
                  9. Final simplification87.3%

                    \[\leadsto \begin{array}{l} \mathbf{if}\;2 \cdot uy \leq 0.0006000000284984708:\\ \;\;\;\;\mathsf{fma}\left(\sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(ux \cdot ux\right) \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right), 1\right)}, \mathsf{fma}\left(2, uy \cdot \left(\pi \cdot yi\right), xi\right), maxCos \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot zi\right)\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \mathsf{fma}\left(-2, xi \cdot \left(\pi \cdot \pi\right), -1.3333333333333333 \cdot \left(\left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right)\\ \end{array} \]
                  10. Add Preprocessing

                  Alternative 10: 89.2% accurate, 4.2× speedup?

                  \[\begin{array}{l} \\ \mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \mathsf{fma}\left(-1.3333333333333333, \left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right), -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right) + zi \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\right) \end{array} \]
                  (FPCore (xi yi zi ux uy maxCos)
                   :precision binary32
                   (+
                    (fma
                     uy
                     (fma
                      uy
                      (fma
                       -1.3333333333333333
                       (* (* uy yi) (* PI (* PI PI)))
                       (* -2.0 (* xi (* PI PI))))
                      (* 2.0 (* PI yi)))
                     xi)
                    (* zi (* ux (* (- 1.0 ux) maxCos)))))
                  float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
                  	return fmaf(uy, fmaf(uy, fmaf(-1.3333333333333333f, ((uy * yi) * (((float) M_PI) * (((float) M_PI) * ((float) M_PI)))), (-2.0f * (xi * (((float) M_PI) * ((float) M_PI))))), (2.0f * (((float) M_PI) * yi))), xi) + (zi * (ux * ((1.0f - ux) * maxCos)));
                  }
                  
                  function code(xi, yi, zi, ux, uy, maxCos)
                  	return Float32(fma(uy, fma(uy, fma(Float32(-1.3333333333333333), Float32(Float32(uy * yi) * Float32(Float32(pi) * Float32(Float32(pi) * Float32(pi)))), Float32(Float32(-2.0) * Float32(xi * Float32(Float32(pi) * Float32(pi))))), Float32(Float32(2.0) * Float32(Float32(pi) * yi))), xi) + Float32(zi * Float32(ux * Float32(Float32(Float32(1.0) - ux) * maxCos))))
                  end
                  
                  \begin{array}{l}
                  
                  \\
                  \mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \mathsf{fma}\left(-1.3333333333333333, \left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right), -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right) + zi \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\right)
                  \end{array}
                  
                  Derivation
                  1. Initial program 99.0%

                    \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
                  2. Add Preprocessing
                  3. Taylor expanded in uy around 0

                    \[\leadsto \color{blue}{\left(uy \cdot \left(2 \cdot \left(\left(yi \cdot \mathsf{PI}\left(\right)\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right) + uy \cdot \left(-2 \cdot \left(\left(xi \cdot {\mathsf{PI}\left(\right)}^{2}\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right) + \frac{-4}{3} \cdot \left(\left(uy \cdot \left(yi \cdot {\mathsf{PI}\left(\right)}^{3}\right)\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right)\right)\right) + xi \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right)} + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
                  4. Applied rewrites89.8%

                    \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(-ux\right)\right), 1\right)}, uy \cdot \mathsf{fma}\left(uy, \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(-ux\right)\right), 1\right)} \cdot \mathsf{fma}\left(-1.3333333333333333, uy \cdot \left(yi \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right)\right), -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), \left(2 \cdot \left(yi \cdot \pi\right)\right) \cdot \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(-ux\right)\right), 1\right)}\right)\right)} + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
                  5. Taylor expanded in maxCos around 0

                    \[\leadsto \left(xi + \color{blue}{uy \cdot \left(2 \cdot \left(yi \cdot \mathsf{PI}\left(\right)\right) + uy \cdot \left(-2 \cdot \left(xi \cdot {\mathsf{PI}\left(\right)}^{2}\right) + \frac{-4}{3} \cdot \left(uy \cdot \left(yi \cdot {\mathsf{PI}\left(\right)}^{3}\right)\right)\right)\right)}\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
                  6. Step-by-step derivation
                    1. Applied rewrites89.8%

                      \[\leadsto \mathsf{fma}\left(uy, \color{blue}{\mathsf{fma}\left(uy, \mathsf{fma}\left(-1.3333333333333333, \left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right), -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), 2 \cdot \left(yi \cdot \pi\right)\right)}, xi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
                    2. Final simplification89.8%

                      \[\leadsto \mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \mathsf{fma}\left(-1.3333333333333333, \left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right), -2 \cdot \left(xi \cdot \left(\pi \cdot \pi\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right) + zi \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\right) \]
                    3. Add Preprocessing

                    Alternative 11: 81.4% accurate, 5.5× speedup?

                    \[\begin{array}{l} \\ \mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \mathsf{fma}\left(-2, xi \cdot \left(\pi \cdot \pi\right), -1.3333333333333333 \cdot \left(\left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right) \end{array} \]
                    (FPCore (xi yi zi ux uy maxCos)
                     :precision binary32
                     (fma
                      uy
                      (fma
                       uy
                       (fma
                        -2.0
                        (* xi (* PI PI))
                        (* -1.3333333333333333 (* (* uy yi) (* PI (* PI PI)))))
                       (* 2.0 (* PI yi)))
                      xi))
                    float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
                    	return fmaf(uy, fmaf(uy, fmaf(-2.0f, (xi * (((float) M_PI) * ((float) M_PI))), (-1.3333333333333333f * ((uy * yi) * (((float) M_PI) * (((float) M_PI) * ((float) M_PI)))))), (2.0f * (((float) M_PI) * yi))), xi);
                    }
                    
                    function code(xi, yi, zi, ux, uy, maxCos)
                    	return fma(uy, fma(uy, fma(Float32(-2.0), Float32(xi * Float32(Float32(pi) * Float32(pi))), Float32(Float32(-1.3333333333333333) * Float32(Float32(uy * yi) * Float32(Float32(pi) * Float32(Float32(pi) * Float32(pi)))))), Float32(Float32(2.0) * Float32(Float32(pi) * yi))), xi)
                    end
                    
                    \begin{array}{l}
                    
                    \\
                    \mathsf{fma}\left(uy, \mathsf{fma}\left(uy, \mathsf{fma}\left(-2, xi \cdot \left(\pi \cdot \pi\right), -1.3333333333333333 \cdot \left(\left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right)
                    \end{array}
                    
                    Derivation
                    1. Initial program 99.0%

                      \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
                    2. Add Preprocessing
                    3. Taylor expanded in ux around 0

                      \[\leadsto \color{blue}{xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)} \]
                    4. Step-by-step derivation
                      1. lower-fma.f32N/A

                        \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)} \]
                      2. lower-cos.f32N/A

                        \[\leadsto \mathsf{fma}\left(xi, \color{blue}{\cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                      3. associate-*r*N/A

                        \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                      4. lower-*.f32N/A

                        \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                      5. lower-*.f32N/A

                        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                      6. lower-PI.f32N/A

                        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \color{blue}{\mathsf{PI}\left(\right)}\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                      7. lower-*.f32N/A

                        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \color{blue}{yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
                      8. lower-sin.f32N/A

                        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \color{blue}{\sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
                      9. associate-*r*N/A

                        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
                      10. lower-*.f32N/A

                        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
                      11. lower-*.f32N/A

                        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right)\right) \]
                      12. lower-PI.f3288.8

                        \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \color{blue}{\pi}\right)\right) \]
                    5. Applied rewrites88.8%

                      \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \pi\right)\right)} \]
                    6. Taylor expanded in uy around 0

                      \[\leadsto xi + \color{blue}{uy \cdot \left(2 \cdot \left(yi \cdot \mathsf{PI}\left(\right)\right) + uy \cdot \left(-2 \cdot \left(xi \cdot {\mathsf{PI}\left(\right)}^{2}\right) + \frac{-4}{3} \cdot \left(uy \cdot \left(yi \cdot {\mathsf{PI}\left(\right)}^{3}\right)\right)\right)\right)} \]
                    7. Step-by-step derivation
                      1. Applied rewrites80.2%

                        \[\leadsto \mathsf{fma}\left(uy, \color{blue}{\mathsf{fma}\left(uy, \mathsf{fma}\left(-2, xi \cdot \left(\pi \cdot \pi\right), -1.3333333333333333 \cdot \left(\left(uy \cdot yi\right) \cdot \left(\pi \cdot \left(\pi \cdot \pi\right)\right)\right)\right), 2 \cdot \left(\pi \cdot yi\right)\right)}, xi\right) \]
                      2. Add Preprocessing

                      Alternative 12: 78.0% accurate, 9.3× speedup?

                      \[\begin{array}{l} \\ \mathsf{fma}\left(uy, \mathsf{fma}\left(uy \cdot -2, xi \cdot \left(\pi \cdot \pi\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right) \end{array} \]
                      (FPCore (xi yi zi ux uy maxCos)
                       :precision binary32
                       (fma uy (fma (* uy -2.0) (* xi (* PI PI)) (* 2.0 (* PI yi))) xi))
                      float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
                      	return fmaf(uy, fmaf((uy * -2.0f), (xi * (((float) M_PI) * ((float) M_PI))), (2.0f * (((float) M_PI) * yi))), xi);
                      }
                      
                      function code(xi, yi, zi, ux, uy, maxCos)
                      	return fma(uy, fma(Float32(uy * Float32(-2.0)), Float32(xi * Float32(Float32(pi) * Float32(pi))), Float32(Float32(2.0) * Float32(Float32(pi) * yi))), xi)
                      end
                      
                      \begin{array}{l}
                      
                      \\
                      \mathsf{fma}\left(uy, \mathsf{fma}\left(uy \cdot -2, xi \cdot \left(\pi \cdot \pi\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right)
                      \end{array}
                      
                      Derivation
                      1. Initial program 99.0%

                        \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
                      2. Add Preprocessing
                      3. Taylor expanded in ux around 0

                        \[\leadsto \color{blue}{xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)} \]
                      4. Step-by-step derivation
                        1. lower-fma.f32N/A

                          \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)} \]
                        2. lower-cos.f32N/A

                          \[\leadsto \mathsf{fma}\left(xi, \color{blue}{\cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                        3. associate-*r*N/A

                          \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                        4. lower-*.f32N/A

                          \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                        5. lower-*.f32N/A

                          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                        6. lower-PI.f32N/A

                          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \color{blue}{\mathsf{PI}\left(\right)}\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                        7. lower-*.f32N/A

                          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \color{blue}{yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
                        8. lower-sin.f32N/A

                          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \color{blue}{\sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
                        9. associate-*r*N/A

                          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
                        10. lower-*.f32N/A

                          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
                        11. lower-*.f32N/A

                          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right)\right) \]
                        12. lower-PI.f3288.8

                          \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \color{blue}{\pi}\right)\right) \]
                      5. Applied rewrites88.8%

                        \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \pi\right)\right)} \]
                      6. Taylor expanded in uy around 0

                        \[\leadsto xi + \color{blue}{uy \cdot \left(-2 \cdot \left(uy \cdot \left(xi \cdot {\mathsf{PI}\left(\right)}^{2}\right)\right) + 2 \cdot \left(yi \cdot \mathsf{PI}\left(\right)\right)\right)} \]
                      7. Step-by-step derivation
                        1. Applied rewrites77.3%

                          \[\leadsto \mathsf{fma}\left(uy, \color{blue}{\mathsf{fma}\left(-2 \cdot uy, xi \cdot \left(\pi \cdot \pi\right), 2 \cdot \left(\pi \cdot yi\right)\right)}, xi\right) \]
                        2. Final simplification77.3%

                          \[\leadsto \mathsf{fma}\left(uy, \mathsf{fma}\left(uy \cdot -2, xi \cdot \left(\pi \cdot \pi\right), 2 \cdot \left(\pi \cdot yi\right)\right), xi\right) \]
                        3. Add Preprocessing

                        Alternative 13: 74.1% accurate, 20.8× speedup?

                        \[\begin{array}{l} \\ \mathsf{fma}\left(2, uy \cdot \left(\pi \cdot yi\right), xi\right) \end{array} \]
                        (FPCore (xi yi zi ux uy maxCos)
                         :precision binary32
                         (fma 2.0 (* uy (* PI yi)) xi))
                        float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
                        	return fmaf(2.0f, (uy * (((float) M_PI) * yi)), xi);
                        }
                        
                        function code(xi, yi, zi, ux, uy, maxCos)
                        	return fma(Float32(2.0), Float32(uy * Float32(Float32(pi) * yi)), xi)
                        end
                        
                        \begin{array}{l}
                        
                        \\
                        \mathsf{fma}\left(2, uy \cdot \left(\pi \cdot yi\right), xi\right)
                        \end{array}
                        
                        Derivation
                        1. Initial program 99.0%

                          \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
                        2. Add Preprocessing
                        3. Taylor expanded in ux around 0

                          \[\leadsto \color{blue}{xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) + yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)} \]
                        4. Step-by-step derivation
                          1. lower-fma.f32N/A

                            \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right)} \]
                          2. lower-cos.f32N/A

                            \[\leadsto \mathsf{fma}\left(xi, \color{blue}{\cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                          3. associate-*r*N/A

                            \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                          4. lower-*.f32N/A

                            \[\leadsto \mathsf{fma}\left(xi, \cos \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}, yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                          5. lower-*.f32N/A

                            \[\leadsto \mathsf{fma}\left(xi, \cos \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                          6. lower-PI.f32N/A

                            \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \color{blue}{\mathsf{PI}\left(\right)}\right), yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \]
                          7. lower-*.f32N/A

                            \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), \color{blue}{yi \cdot \sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
                          8. lower-sin.f32N/A

                            \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \color{blue}{\sin \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)}\right) \]
                          9. associate-*r*N/A

                            \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
                          10. lower-*.f32N/A

                            \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \color{blue}{\left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right)}\right) \]
                          11. lower-*.f32N/A

                            \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \mathsf{PI}\left(\right)\right), yi \cdot \sin \left(\color{blue}{\left(2 \cdot uy\right)} \cdot \mathsf{PI}\left(\right)\right)\right) \]
                          12. lower-PI.f3288.8

                            \[\leadsto \mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \color{blue}{\pi}\right)\right) \]
                        5. Applied rewrites88.8%

                          \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), yi \cdot \sin \left(\left(2 \cdot uy\right) \cdot \pi\right)\right)} \]
                        6. Taylor expanded in uy around 0

                          \[\leadsto xi + \color{blue}{2 \cdot \left(uy \cdot \left(yi \cdot \mathsf{PI}\left(\right)\right)\right)} \]
                        7. Step-by-step derivation
                          1. Applied rewrites73.2%

                            \[\leadsto \mathsf{fma}\left(2, \color{blue}{uy \cdot \left(\pi \cdot yi\right)}, xi\right) \]
                          2. Add Preprocessing

                          Alternative 14: 45.0% accurate, 58.8× speedup?

                          \[\begin{array}{l} \\ xi \cdot 1 \end{array} \]
                          (FPCore (xi yi zi ux uy maxCos) :precision binary32 (* xi 1.0))
                          float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
                          	return xi * 1.0f;
                          }
                          
                          real(4) function code(xi, yi, zi, ux, uy, maxcos)
                              real(4), intent (in) :: xi
                              real(4), intent (in) :: yi
                              real(4), intent (in) :: zi
                              real(4), intent (in) :: ux
                              real(4), intent (in) :: uy
                              real(4), intent (in) :: maxcos
                              code = xi * 1.0e0
                          end function
                          
                          function code(xi, yi, zi, ux, uy, maxCos)
                          	return Float32(xi * Float32(1.0))
                          end
                          
                          function tmp = code(xi, yi, zi, ux, uy, maxCos)
                          	tmp = xi * single(1.0);
                          end
                          
                          \begin{array}{l}
                          
                          \\
                          xi \cdot 1
                          \end{array}
                          
                          Derivation
                          1. Initial program 99.0%

                            \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
                          2. Add Preprocessing
                          3. Taylor expanded in yi around 0

                            \[\leadsto \color{blue}{maxCos \cdot \left(ux \cdot \left(zi \cdot \left(1 - ux\right)\right)\right) + \left(xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}} \]
                          4. Step-by-step derivation
                            1. +-commutativeN/A

                              \[\leadsto \color{blue}{\left(xi \cdot \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)} + maxCos \cdot \left(ux \cdot \left(zi \cdot \left(1 - ux\right)\right)\right)} \]
                            2. associate-*l*N/A

                              \[\leadsto \color{blue}{xi \cdot \left(\cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}\right)} + maxCos \cdot \left(ux \cdot \left(zi \cdot \left(1 - ux\right)\right)\right) \]
                            3. lower-fma.f32N/A

                              \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right) \cdot \sqrt{1 - {maxCos}^{2} \cdot \left({ux}^{2} \cdot {\left(1 - ux\right)}^{2}\right)}, maxCos \cdot \left(ux \cdot \left(zi \cdot \left(1 - ux\right)\right)\right)\right)} \]
                          5. Applied rewrites61.9%

                            \[\leadsto \color{blue}{\mathsf{fma}\left(xi, \sqrt{\mathsf{fma}\left(maxCos \cdot maxCos, \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right) \cdot \left(ux \cdot \left(-ux\right)\right), 1\right)} \cdot \cos \left(\left(2 \cdot uy\right) \cdot \pi\right), maxCos \cdot \left(\left(ux \cdot zi\right) \cdot \left(1 - ux\right)\right)\right)} \]
                          6. Taylor expanded in maxCos around 0

                            \[\leadsto xi \cdot \color{blue}{\cos \left(2 \cdot \left(uy \cdot \mathsf{PI}\left(\right)\right)\right)} \]
                          7. Step-by-step derivation
                            1. Applied rewrites53.8%

                              \[\leadsto xi \cdot \color{blue}{\cos \left(2 \cdot \left(uy \cdot \pi\right)\right)} \]
                            2. Taylor expanded in uy around 0

                              \[\leadsto xi \cdot 1 \]
                            3. Step-by-step derivation
                              1. Applied rewrites46.7%

                                \[\leadsto xi \cdot 1 \]
                              2. Add Preprocessing

                              Reproduce

                              ?
                              herbie shell --seed 2024221 
                              (FPCore (xi yi zi ux uy maxCos)
                                :name "UniformSampleCone 2"
                                :precision binary32
                                :pre (and (and (and (and (and (and (<= -10000.0 xi) (<= xi 10000.0)) (and (<= -10000.0 yi) (<= yi 10000.0))) (and (<= -10000.0 zi) (<= zi 10000.0))) (and (<= 2.328306437e-10 ux) (<= ux 1.0))) (and (<= 2.328306437e-10 uy) (<= uy 1.0))) (and (<= 0.0 maxCos) (<= maxCos 1.0)))
                                (+ (+ (* (* (cos (* (* uy 2.0) PI)) (sqrt (- 1.0 (* (* (* (- 1.0 ux) maxCos) ux) (* (* (- 1.0 ux) maxCos) ux))))) xi) (* (* (sin (* (* uy 2.0) PI)) (sqrt (- 1.0 (* (* (* (- 1.0 ux) maxCos) ux) (* (* (- 1.0 ux) maxCos) ux))))) yi)) (* (* (* (- 1.0 ux) maxCos) ux) zi)))