UniformSampleCone 2

Percentage Accurate: 98.9% → 98.9%
Time: 23.5s
Alternatives: 9
Speedup: 1.0×

Specification

?
\[\left(\left(\left(\left(\left(-10000 \leq xi \land xi \leq 10000\right) \land \left(-10000 \leq yi \land yi \leq 10000\right)\right) \land \left(-10000 \leq zi \land zi \leq 10000\right)\right) \land \left(2.328306437 \cdot 10^{-10} \leq ux \land ux \leq 1\right)\right) \land \left(2.328306437 \cdot 10^{-10} \leq uy \land uy \leq 1\right)\right) \land \left(0 \leq maxCos \land maxCos \leq 1\right)\]
\[\begin{array}{l} \\ \begin{array}{l} t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\ t_1 := \sqrt{1 - t_0 \cdot t_0}\\ t_2 := \left(uy \cdot 2\right) \cdot \pi\\ \left(\left(\cos t_2 \cdot t_1\right) \cdot xi + \left(\sin t_2 \cdot t_1\right) \cdot yi\right) + t_0 \cdot zi \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* (* (- 1.0 ux) maxCos) ux))
        (t_1 (sqrt (- 1.0 (* t_0 t_0))))
        (t_2 (* (* uy 2.0) PI)))
   (+ (+ (* (* (cos t_2) t_1) xi) (* (* (sin t_2) t_1) yi)) (* t_0 zi))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = ((1.0f - ux) * maxCos) * ux;
	float t_1 = sqrtf((1.0f - (t_0 * t_0)));
	float t_2 = (uy * 2.0f) * ((float) M_PI);
	return (((cosf(t_2) * t_1) * xi) + ((sinf(t_2) * t_1) * yi)) + (t_0 * zi);
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(Float32(Float32(Float32(1.0) - ux) * maxCos) * ux)
	t_1 = sqrt(Float32(Float32(1.0) - Float32(t_0 * t_0)))
	t_2 = Float32(Float32(uy * Float32(2.0)) * Float32(pi))
	return Float32(Float32(Float32(Float32(cos(t_2) * t_1) * xi) + Float32(Float32(sin(t_2) * t_1) * yi)) + Float32(t_0 * zi))
end
function tmp = code(xi, yi, zi, ux, uy, maxCos)
	t_0 = ((single(1.0) - ux) * maxCos) * ux;
	t_1 = sqrt((single(1.0) - (t_0 * t_0)));
	t_2 = (uy * single(2.0)) * single(pi);
	tmp = (((cos(t_2) * t_1) * xi) + ((sin(t_2) * t_1) * yi)) + (t_0 * zi);
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\
t_1 := \sqrt{1 - t_0 \cdot t_0}\\
t_2 := \left(uy \cdot 2\right) \cdot \pi\\
\left(\left(\cos t_2 \cdot t_1\right) \cdot xi + \left(\sin t_2 \cdot t_1\right) \cdot yi\right) + t_0 \cdot zi
\end{array}
\end{array}

Sampling outcomes in binary32 precision:

Local Percentage Accuracy vs ?

The average percentage accuracy by input value. Horizontal axis shows value of an input variable; the variable is choosen in the title. Vertical axis is accuracy; higher is better. Red represent the original program, while blue represents Herbie's suggestion. These can be toggled with buttons below the plot. The line is an average while dots represent individual samples.

Accuracy vs Speed?

Herbie found 9 alternatives:

AlternativeAccuracySpeedup
The accuracy (vertical axis) and speed (horizontal axis) of each alternatives. Up and to the right is better. The red square shows the initial program, and each blue circle shows an alternative.The line shows the best available speed-accuracy tradeoffs.

Initial Program: 98.9% accurate, 1.0× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\ t_1 := \sqrt{1 - t_0 \cdot t_0}\\ t_2 := \left(uy \cdot 2\right) \cdot \pi\\ \left(\left(\cos t_2 \cdot t_1\right) \cdot xi + \left(\sin t_2 \cdot t_1\right) \cdot yi\right) + t_0 \cdot zi \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* (* (- 1.0 ux) maxCos) ux))
        (t_1 (sqrt (- 1.0 (* t_0 t_0))))
        (t_2 (* (* uy 2.0) PI)))
   (+ (+ (* (* (cos t_2) t_1) xi) (* (* (sin t_2) t_1) yi)) (* t_0 zi))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = ((1.0f - ux) * maxCos) * ux;
	float t_1 = sqrtf((1.0f - (t_0 * t_0)));
	float t_2 = (uy * 2.0f) * ((float) M_PI);
	return (((cosf(t_2) * t_1) * xi) + ((sinf(t_2) * t_1) * yi)) + (t_0 * zi);
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(Float32(Float32(Float32(1.0) - ux) * maxCos) * ux)
	t_1 = sqrt(Float32(Float32(1.0) - Float32(t_0 * t_0)))
	t_2 = Float32(Float32(uy * Float32(2.0)) * Float32(pi))
	return Float32(Float32(Float32(Float32(cos(t_2) * t_1) * xi) + Float32(Float32(sin(t_2) * t_1) * yi)) + Float32(t_0 * zi))
end
function tmp = code(xi, yi, zi, ux, uy, maxCos)
	t_0 = ((single(1.0) - ux) * maxCos) * ux;
	t_1 = sqrt((single(1.0) - (t_0 * t_0)));
	t_2 = (uy * single(2.0)) * single(pi);
	tmp = (((cos(t_2) * t_1) * xi) + ((sin(t_2) * t_1) * yi)) + (t_0 * zi);
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\
t_1 := \sqrt{1 - t_0 \cdot t_0}\\
t_2 := \left(uy \cdot 2\right) \cdot \pi\\
\left(\left(\cos t_2 \cdot t_1\right) \cdot xi + \left(\sin t_2 \cdot t_1\right) \cdot yi\right) + t_0 \cdot zi
\end{array}
\end{array}

Alternative 1: 98.9% accurate, 0.8× speedup?

\[\begin{array}{l} \\ \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(\mathsf{log1p}\left(\mathsf{expm1}\left(\pi \cdot \left(uy \cdot 2\right)\right)\right)\right) \cdot xi + \sin \left(uy \cdot \left(\pi \cdot 2\right)\right) \cdot yi\right)\right) \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (fma
  ux
  (* (- 1.0 ux) (* maxCos zi))
  (*
   (sqrt (- 1.0 (* ux (* ux (* maxCos (* maxCos (* (- 1.0 ux) (- 1.0 ux))))))))
   (+
    (* (cos (log1p (expm1 (* PI (* uy 2.0))))) xi)
    (* (sin (* uy (* PI 2.0))) yi)))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	return fmaf(ux, ((1.0f - ux) * (maxCos * zi)), (sqrtf((1.0f - (ux * (ux * (maxCos * (maxCos * ((1.0f - ux) * (1.0f - ux)))))))) * ((cosf(log1pf(expm1f((((float) M_PI) * (uy * 2.0f))))) * xi) + (sinf((uy * (((float) M_PI) * 2.0f))) * yi))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	return fma(ux, Float32(Float32(Float32(1.0) - ux) * Float32(maxCos * zi)), Float32(sqrt(Float32(Float32(1.0) - Float32(ux * Float32(ux * Float32(maxCos * Float32(maxCos * Float32(Float32(Float32(1.0) - ux) * Float32(Float32(1.0) - ux)))))))) * Float32(Float32(cos(log1p(expm1(Float32(Float32(pi) * Float32(uy * Float32(2.0)))))) * xi) + Float32(sin(Float32(uy * Float32(Float32(pi) * Float32(2.0)))) * yi))))
end
\begin{array}{l}

\\
\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(\mathsf{log1p}\left(\mathsf{expm1}\left(\pi \cdot \left(uy \cdot 2\right)\right)\right)\right) \cdot xi + \sin \left(uy \cdot \left(\pi \cdot 2\right)\right) \cdot yi\right)\right)
\end{array}
Derivation
  1. Initial program 99.0%

    \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  2. Simplified99.1%

    \[\leadsto \color{blue}{\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + \sin \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot yi\right)\right)} \]
  3. Step-by-step derivation
    1. log1p-expm1-u99.1%

      \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \color{blue}{\left(\mathsf{log1p}\left(\mathsf{expm1}\left(uy \cdot \left(2 \cdot \pi\right)\right)\right)\right)} \cdot xi + \sin \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot yi\right)\right) \]
    2. associate-*r*99.1%

      \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(\mathsf{log1p}\left(\mathsf{expm1}\left(\color{blue}{\left(uy \cdot 2\right) \cdot \pi}\right)\right)\right) \cdot xi + \sin \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot yi\right)\right) \]
    3. *-commutative99.1%

      \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(\mathsf{log1p}\left(\mathsf{expm1}\left(\color{blue}{\pi \cdot \left(uy \cdot 2\right)}\right)\right)\right) \cdot xi + \sin \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot yi\right)\right) \]
  4. Applied egg-rr99.1%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \color{blue}{\left(\mathsf{log1p}\left(\mathsf{expm1}\left(\pi \cdot \left(uy \cdot 2\right)\right)\right)\right)} \cdot xi + \sin \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot yi\right)\right) \]
  5. Final simplification99.1%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(\mathsf{log1p}\left(\mathsf{expm1}\left(\pi \cdot \left(uy \cdot 2\right)\right)\right)\right) \cdot xi + \sin \left(uy \cdot \left(\pi \cdot 2\right)\right) \cdot yi\right)\right) \]

Alternative 2: 98.9% accurate, 1.0× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := uy \cdot \left(\pi \cdot 2\right)\\ \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\sin t_0 \cdot yi + xi \cdot \cos t_0\right)\right) \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* uy (* PI 2.0))))
   (fma
    ux
    (* (- 1.0 ux) (* maxCos zi))
    (*
     (sqrt
      (- 1.0 (* ux (* ux (* maxCos (* maxCos (* (- 1.0 ux) (- 1.0 ux))))))))
     (+ (* (sin t_0) yi) (* xi (cos t_0)))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = uy * (((float) M_PI) * 2.0f);
	return fmaf(ux, ((1.0f - ux) * (maxCos * zi)), (sqrtf((1.0f - (ux * (ux * (maxCos * (maxCos * ((1.0f - ux) * (1.0f - ux)))))))) * ((sinf(t_0) * yi) + (xi * cosf(t_0)))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(uy * Float32(Float32(pi) * Float32(2.0)))
	return fma(ux, Float32(Float32(Float32(1.0) - ux) * Float32(maxCos * zi)), Float32(sqrt(Float32(Float32(1.0) - Float32(ux * Float32(ux * Float32(maxCos * Float32(maxCos * Float32(Float32(Float32(1.0) - ux) * Float32(Float32(1.0) - ux)))))))) * Float32(Float32(sin(t_0) * yi) + Float32(xi * cos(t_0)))))
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := uy \cdot \left(\pi \cdot 2\right)\\
\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\sin t_0 \cdot yi + xi \cdot \cos t_0\right)\right)
\end{array}
\end{array}
Derivation
  1. Initial program 99.0%

    \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  2. Simplified99.1%

    \[\leadsto \color{blue}{\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + \sin \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot yi\right)\right)} \]
  3. Final simplification99.1%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\sin \left(uy \cdot \left(\pi \cdot 2\right)\right) \cdot yi + xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right)\right)\right) \]

Alternative 3: 98.7% accurate, 1.0× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := uy \cdot \left(\pi \cdot 2\right)\\ \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \left(\sin t_0 \cdot yi + xi \cdot \cos t_0\right) \cdot \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(ux \cdot ux\right)\right)\right)\right)}\right) \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* uy (* PI 2.0))))
   (fma
    ux
    (* (- 1.0 ux) (* maxCos zi))
    (*
     (+ (* (sin t_0) yi) (* xi (cos t_0)))
     (sqrt (- 1.0 (* ux (* ux (* maxCos (* maxCos (* ux ux)))))))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = uy * (((float) M_PI) * 2.0f);
	return fmaf(ux, ((1.0f - ux) * (maxCos * zi)), (((sinf(t_0) * yi) + (xi * cosf(t_0))) * sqrtf((1.0f - (ux * (ux * (maxCos * (maxCos * (ux * ux)))))))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(uy * Float32(Float32(pi) * Float32(2.0)))
	return fma(ux, Float32(Float32(Float32(1.0) - ux) * Float32(maxCos * zi)), Float32(Float32(Float32(sin(t_0) * yi) + Float32(xi * cos(t_0))) * sqrt(Float32(Float32(1.0) - Float32(ux * Float32(ux * Float32(maxCos * Float32(maxCos * Float32(ux * ux)))))))))
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := uy \cdot \left(\pi \cdot 2\right)\\
\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \left(\sin t_0 \cdot yi + xi \cdot \cos t_0\right) \cdot \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(ux \cdot ux\right)\right)\right)\right)}\right)
\end{array}
\end{array}
Derivation
  1. Initial program 99.0%

    \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  2. Simplified99.1%

    \[\leadsto \color{blue}{\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + \sin \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot yi\right)\right)} \]
  3. Taylor expanded in ux around inf 98.7%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \color{blue}{{ux}^{2}}\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + \sin \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot yi\right)\right) \]
  4. Step-by-step derivation
    1. unpow291.3%

      \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \color{blue}{\left(ux \cdot ux\right)}\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + 2 \cdot \left(uy \cdot \left(yi \cdot \pi\right)\right)\right)\right) \]
  5. Simplified98.7%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \color{blue}{\left(ux \cdot ux\right)}\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + \sin \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot yi\right)\right) \]
  6. Final simplification98.7%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \left(\sin \left(uy \cdot \left(\pi \cdot 2\right)\right) \cdot yi + xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right)\right) \cdot \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(ux \cdot ux\right)\right)\right)\right)}\right) \]

Alternative 4: 98.8% accurate, 1.2× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\\ \left(xi \cdot \left(\cos \left(\pi \cdot \left(uy \cdot 2\right)\right) \cdot \sqrt{1 + t_0 \cdot \left(ux \cdot \left(maxCos \cdot \left(ux + -1\right)\right)\right)}\right) + yi \cdot \sin \left(2 \cdot \left(\pi \cdot uy\right)\right)\right) + zi \cdot t_0 \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* ux (* (- 1.0 ux) maxCos))))
   (+
    (+
     (*
      xi
      (*
       (cos (* PI (* uy 2.0)))
       (sqrt (+ 1.0 (* t_0 (* ux (* maxCos (+ ux -1.0))))))))
     (* yi (sin (* 2.0 (* PI uy)))))
    (* zi t_0))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = ux * ((1.0f - ux) * maxCos);
	return ((xi * (cosf((((float) M_PI) * (uy * 2.0f))) * sqrtf((1.0f + (t_0 * (ux * (maxCos * (ux + -1.0f)))))))) + (yi * sinf((2.0f * (((float) M_PI) * uy))))) + (zi * t_0);
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(ux * Float32(Float32(Float32(1.0) - ux) * maxCos))
	return Float32(Float32(Float32(xi * Float32(cos(Float32(Float32(pi) * Float32(uy * Float32(2.0)))) * sqrt(Float32(Float32(1.0) + Float32(t_0 * Float32(ux * Float32(maxCos * Float32(ux + Float32(-1.0))))))))) + Float32(yi * sin(Float32(Float32(2.0) * Float32(Float32(pi) * uy))))) + Float32(zi * t_0))
end
function tmp = code(xi, yi, zi, ux, uy, maxCos)
	t_0 = ux * ((single(1.0) - ux) * maxCos);
	tmp = ((xi * (cos((single(pi) * (uy * single(2.0)))) * sqrt((single(1.0) + (t_0 * (ux * (maxCos * (ux + single(-1.0))))))))) + (yi * sin((single(2.0) * (single(pi) * uy))))) + (zi * t_0);
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\\
\left(xi \cdot \left(\cos \left(\pi \cdot \left(uy \cdot 2\right)\right) \cdot \sqrt{1 + t_0 \cdot \left(ux \cdot \left(maxCos \cdot \left(ux + -1\right)\right)\right)}\right) + yi \cdot \sin \left(2 \cdot \left(\pi \cdot uy\right)\right)\right) + zi \cdot t_0
\end{array}
\end{array}
Derivation
  1. Initial program 99.0%

    \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  2. Taylor expanded in ux around 0 98.6%

    \[\leadsto \left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \color{blue}{{maxCos}^{2} \cdot {ux}^{2}}}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  3. Step-by-step derivation
    1. *-commutative98.6%

      \[\leadsto \left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \color{blue}{{ux}^{2} \cdot {maxCos}^{2}}}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
    2. unpow298.6%

      \[\leadsto \left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \color{blue}{\left(ux \cdot ux\right)} \cdot {maxCos}^{2}}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
    3. unpow298.6%

      \[\leadsto \left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(ux \cdot ux\right) \cdot \color{blue}{\left(maxCos \cdot maxCos\right)}}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  4. Simplified98.6%

    \[\leadsto \left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \color{blue}{\left(ux \cdot ux\right) \cdot \left(maxCos \cdot maxCos\right)}}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  5. Taylor expanded in ux around 0 98.7%

    \[\leadsto \left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \color{blue}{yi \cdot \sin \left(2 \cdot \left(uy \cdot \pi\right)\right)}\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  6. Final simplification98.7%

    \[\leadsto \left(xi \cdot \left(\cos \left(\pi \cdot \left(uy \cdot 2\right)\right) \cdot \sqrt{1 + \left(ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\right) \cdot \left(ux \cdot \left(maxCos \cdot \left(ux + -1\right)\right)\right)}\right) + yi \cdot \sin \left(2 \cdot \left(\pi \cdot uy\right)\right)\right) + zi \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\right) \]

Alternative 5: 90.2% accurate, 1.2× speedup?

\[\begin{array}{l} \\ \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right) + 2 \cdot \left(uy \cdot \left(\pi \cdot yi\right)\right)\right)\right) \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (fma
  ux
  (* (- 1.0 ux) (* maxCos zi))
  (*
   (sqrt (- 1.0 (* ux (* ux (* maxCos (* maxCos (* (- 1.0 ux) (- 1.0 ux))))))))
   (+ (* xi (cos (* uy (* PI 2.0)))) (* 2.0 (* uy (* PI yi)))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	return fmaf(ux, ((1.0f - ux) * (maxCos * zi)), (sqrtf((1.0f - (ux * (ux * (maxCos * (maxCos * ((1.0f - ux) * (1.0f - ux)))))))) * ((xi * cosf((uy * (((float) M_PI) * 2.0f)))) + (2.0f * (uy * (((float) M_PI) * yi))))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	return fma(ux, Float32(Float32(Float32(1.0) - ux) * Float32(maxCos * zi)), Float32(sqrt(Float32(Float32(1.0) - Float32(ux * Float32(ux * Float32(maxCos * Float32(maxCos * Float32(Float32(Float32(1.0) - ux) * Float32(Float32(1.0) - ux)))))))) * Float32(Float32(xi * cos(Float32(uy * Float32(Float32(pi) * Float32(2.0))))) + Float32(Float32(2.0) * Float32(uy * Float32(Float32(pi) * yi))))))
end
\begin{array}{l}

\\
\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right) + 2 \cdot \left(uy \cdot \left(\pi \cdot yi\right)\right)\right)\right)
\end{array}
Derivation
  1. Initial program 99.0%

    \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  2. Simplified99.1%

    \[\leadsto \color{blue}{\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + \sin \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot yi\right)\right)} \]
  3. Taylor expanded in uy around 0 91.4%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + \color{blue}{2 \cdot \left(uy \cdot \left(yi \cdot \pi\right)\right)}\right)\right) \]
  4. Final simplification91.4%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right) + 2 \cdot \left(uy \cdot \left(\pi \cdot yi\right)\right)\right)\right) \]

Alternative 6: 90.2% accurate, 1.2× speedup?

\[\begin{array}{l} \\ \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right) + 2 \cdot \left(yi \cdot \left(\pi \cdot uy\right)\right)\right)\right) \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (fma
  ux
  (* (- 1.0 ux) (* maxCos zi))
  (*
   (sqrt (- 1.0 (* ux (* ux (* maxCos (* maxCos (* (- 1.0 ux) (- 1.0 ux))))))))
   (+ (* xi (cos (* uy (* PI 2.0)))) (* 2.0 (* yi (* PI uy)))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	return fmaf(ux, ((1.0f - ux) * (maxCos * zi)), (sqrtf((1.0f - (ux * (ux * (maxCos * (maxCos * ((1.0f - ux) * (1.0f - ux)))))))) * ((xi * cosf((uy * (((float) M_PI) * 2.0f)))) + (2.0f * (yi * (((float) M_PI) * uy))))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	return fma(ux, Float32(Float32(Float32(1.0) - ux) * Float32(maxCos * zi)), Float32(sqrt(Float32(Float32(1.0) - Float32(ux * Float32(ux * Float32(maxCos * Float32(maxCos * Float32(Float32(Float32(1.0) - ux) * Float32(Float32(1.0) - ux)))))))) * Float32(Float32(xi * cos(Float32(uy * Float32(Float32(pi) * Float32(2.0))))) + Float32(Float32(2.0) * Float32(yi * Float32(Float32(pi) * uy))))))
end
\begin{array}{l}

\\
\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right) + 2 \cdot \left(yi \cdot \left(\pi \cdot uy\right)\right)\right)\right)
\end{array}
Derivation
  1. Initial program 99.0%

    \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  2. Simplified99.1%

    \[\leadsto \color{blue}{\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + \sin \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot yi\right)\right)} \]
  3. Taylor expanded in uy around 0 91.4%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + \color{blue}{2 \cdot \left(uy \cdot \left(yi \cdot \pi\right)\right)}\right)\right) \]
  4. Taylor expanded in uy around 0 91.4%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + 2 \cdot \color{blue}{\left(yi \cdot \left(uy \cdot \pi\right)\right)}\right)\right) \]
  5. Final simplification91.4%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right) + 2 \cdot \left(yi \cdot \left(\pi \cdot uy\right)\right)\right)\right) \]

Alternative 7: 89.9% accurate, 1.2× speedup?

\[\begin{array}{l} \\ \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(ux \cdot ux\right)\right)\right)\right)} \cdot \left(xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right) + 2 \cdot \left(uy \cdot \left(\pi \cdot yi\right)\right)\right)\right) \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (fma
  ux
  (* (- 1.0 ux) (* maxCos zi))
  (*
   (sqrt (- 1.0 (* ux (* ux (* maxCos (* maxCos (* ux ux)))))))
   (+ (* xi (cos (* uy (* PI 2.0)))) (* 2.0 (* uy (* PI yi)))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	return fmaf(ux, ((1.0f - ux) * (maxCos * zi)), (sqrtf((1.0f - (ux * (ux * (maxCos * (maxCos * (ux * ux))))))) * ((xi * cosf((uy * (((float) M_PI) * 2.0f)))) + (2.0f * (uy * (((float) M_PI) * yi))))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	return fma(ux, Float32(Float32(Float32(1.0) - ux) * Float32(maxCos * zi)), Float32(sqrt(Float32(Float32(1.0) - Float32(ux * Float32(ux * Float32(maxCos * Float32(maxCos * Float32(ux * ux))))))) * Float32(Float32(xi * cos(Float32(uy * Float32(Float32(pi) * Float32(2.0))))) + Float32(Float32(2.0) * Float32(uy * Float32(Float32(pi) * yi))))))
end
\begin{array}{l}

\\
\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(ux \cdot ux\right)\right)\right)\right)} \cdot \left(xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right) + 2 \cdot \left(uy \cdot \left(\pi \cdot yi\right)\right)\right)\right)
\end{array}
Derivation
  1. Initial program 99.0%

    \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  2. Simplified99.1%

    \[\leadsto \color{blue}{\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + \sin \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot yi\right)\right)} \]
  3. Taylor expanded in uy around 0 91.4%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + \color{blue}{2 \cdot \left(uy \cdot \left(yi \cdot \pi\right)\right)}\right)\right) \]
  4. Taylor expanded in ux around inf 91.3%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \color{blue}{{ux}^{2}}\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + 2 \cdot \left(uy \cdot \left(yi \cdot \pi\right)\right)\right)\right) \]
  5. Step-by-step derivation
    1. unpow291.3%

      \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \color{blue}{\left(ux \cdot ux\right)}\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + 2 \cdot \left(uy \cdot \left(yi \cdot \pi\right)\right)\right)\right) \]
  6. Simplified91.3%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \color{blue}{\left(ux \cdot ux\right)}\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + 2 \cdot \left(uy \cdot \left(yi \cdot \pi\right)\right)\right)\right) \]
  7. Final simplification91.3%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(ux \cdot ux\right)\right)\right)\right)} \cdot \left(xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right) + 2 \cdot \left(uy \cdot \left(\pi \cdot yi\right)\right)\right)\right) \]

Alternative 8: 90.0% accurate, 1.2× speedup?

\[\begin{array}{l} \\ \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \left(xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right) + 2 \cdot \left(uy \cdot \left(\pi \cdot yi\right)\right)\right) \cdot \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot maxCos\right)\right)}\right) \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (fma
  ux
  (* (- 1.0 ux) (* maxCos zi))
  (*
   (+ (* xi (cos (* uy (* PI 2.0)))) (* 2.0 (* uy (* PI yi))))
   (sqrt (- 1.0 (* ux (* ux (* maxCos maxCos))))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	return fmaf(ux, ((1.0f - ux) * (maxCos * zi)), (((xi * cosf((uy * (((float) M_PI) * 2.0f)))) + (2.0f * (uy * (((float) M_PI) * yi)))) * sqrtf((1.0f - (ux * (ux * (maxCos * maxCos)))))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	return fma(ux, Float32(Float32(Float32(1.0) - ux) * Float32(maxCos * zi)), Float32(Float32(Float32(xi * cos(Float32(uy * Float32(Float32(pi) * Float32(2.0))))) + Float32(Float32(2.0) * Float32(uy * Float32(Float32(pi) * yi)))) * sqrt(Float32(Float32(1.0) - Float32(ux * Float32(ux * Float32(maxCos * maxCos)))))))
end
\begin{array}{l}

\\
\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \left(xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right) + 2 \cdot \left(uy \cdot \left(\pi \cdot yi\right)\right)\right) \cdot \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot maxCos\right)\right)}\right)
\end{array}
Derivation
  1. Initial program 99.0%

    \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  2. Simplified99.1%

    \[\leadsto \color{blue}{\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + \sin \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot yi\right)\right)} \]
  3. Taylor expanded in uy around 0 91.4%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + \color{blue}{2 \cdot \left(uy \cdot \left(yi \cdot \pi\right)\right)}\right)\right) \]
  4. Taylor expanded in ux around 0 91.3%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \color{blue}{1}\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + 2 \cdot \left(uy \cdot \left(yi \cdot \pi\right)\right)\right)\right) \]
  5. Final simplification91.3%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \left(xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right) + 2 \cdot \left(uy \cdot \left(\pi \cdot yi\right)\right)\right) \cdot \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot maxCos\right)\right)}\right) \]

Alternative 9: 90.0% accurate, 1.2× speedup?

\[\begin{array}{l} \\ \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \left(xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right) + 2 \cdot \left(yi \cdot \left(\pi \cdot uy\right)\right)\right) \cdot \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot maxCos\right)\right)}\right) \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (fma
  ux
  (* (- 1.0 ux) (* maxCos zi))
  (*
   (+ (* xi (cos (* uy (* PI 2.0)))) (* 2.0 (* yi (* PI uy))))
   (sqrt (- 1.0 (* ux (* ux (* maxCos maxCos))))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	return fmaf(ux, ((1.0f - ux) * (maxCos * zi)), (((xi * cosf((uy * (((float) M_PI) * 2.0f)))) + (2.0f * (yi * (((float) M_PI) * uy)))) * sqrtf((1.0f - (ux * (ux * (maxCos * maxCos)))))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	return fma(ux, Float32(Float32(Float32(1.0) - ux) * Float32(maxCos * zi)), Float32(Float32(Float32(xi * cos(Float32(uy * Float32(Float32(pi) * Float32(2.0))))) + Float32(Float32(2.0) * Float32(yi * Float32(Float32(pi) * uy)))) * sqrt(Float32(Float32(1.0) - Float32(ux * Float32(ux * Float32(maxCos * maxCos)))))))
end
\begin{array}{l}

\\
\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \left(xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right) + 2 \cdot \left(yi \cdot \left(\pi \cdot uy\right)\right)\right) \cdot \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot maxCos\right)\right)}\right)
\end{array}
Derivation
  1. Initial program 99.0%

    \[\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot xi + \left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right)}\right) \cdot yi\right) + \left(\left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\right) \cdot zi \]
  2. Simplified99.1%

    \[\leadsto \color{blue}{\mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + \sin \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot yi\right)\right)} \]
  3. Taylor expanded in uy around 0 91.4%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \left(\left(1 - ux\right) \cdot \left(1 - ux\right)\right)\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + \color{blue}{2 \cdot \left(uy \cdot \left(yi \cdot \pi\right)\right)}\right)\right) \]
  4. Taylor expanded in ux around 0 91.3%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot \color{blue}{1}\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + 2 \cdot \left(uy \cdot \left(yi \cdot \pi\right)\right)\right)\right) \]
  5. Taylor expanded in uy around 0 91.3%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot \left(maxCos \cdot 1\right)\right)\right)} \cdot \left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right) \cdot xi + 2 \cdot \color{blue}{\left(yi \cdot \left(uy \cdot \pi\right)\right)}\right)\right) \]
  6. Final simplification91.3%

    \[\leadsto \mathsf{fma}\left(ux, \left(1 - ux\right) \cdot \left(maxCos \cdot zi\right), \left(xi \cdot \cos \left(uy \cdot \left(\pi \cdot 2\right)\right) + 2 \cdot \left(yi \cdot \left(\pi \cdot uy\right)\right)\right) \cdot \sqrt{1 - ux \cdot \left(ux \cdot \left(maxCos \cdot maxCos\right)\right)}\right) \]

Reproduce

?
herbie shell --seed 2023188 
(FPCore (xi yi zi ux uy maxCos)
  :name "UniformSampleCone 2"
  :precision binary32
  :pre (and (and (and (and (and (and (<= -10000.0 xi) (<= xi 10000.0)) (and (<= -10000.0 yi) (<= yi 10000.0))) (and (<= -10000.0 zi) (<= zi 10000.0))) (and (<= 2.328306437e-10 ux) (<= ux 1.0))) (and (<= 2.328306437e-10 uy) (<= uy 1.0))) (and (<= 0.0 maxCos) (<= maxCos 1.0)))
  (+ (+ (* (* (cos (* (* uy 2.0) PI)) (sqrt (- 1.0 (* (* (* (- 1.0 ux) maxCos) ux) (* (* (- 1.0 ux) maxCos) ux))))) xi) (* (* (sin (* (* uy 2.0) PI)) (sqrt (- 1.0 (* (* (* (- 1.0 ux) maxCos) ux) (* (* (- 1.0 ux) maxCos) ux))))) yi)) (* (* (* (- 1.0 ux) maxCos) ux) zi)))