UniformSampleCone 2

Percentage Accurate: 98.9% → 98.9%
Time: 26.8s
Alternatives: 14
Speedup: 1.0×

Specification

?
\[\left(\left(\left(\left(\left(-10000 \leq xi \land xi \leq 10000\right) \land \left(-10000 \leq yi \land yi \leq 10000\right)\right) \land \left(-10000 \leq zi \land zi \leq 10000\right)\right) \land \left(2.328306437 \cdot 10^{-10} \leq ux \land ux \leq 1\right)\right) \land \left(2.328306437 \cdot 10^{-10} \leq uy \land uy \leq 1\right)\right) \land \left(0 \leq maxCos \land maxCos \leq 1\right)\]
\[\begin{array}{l} \\ \begin{array}{l} t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\ t_1 := \sqrt{1 - t_0 \cdot t_0}\\ t_2 := \left(uy \cdot 2\right) \cdot \pi\\ \left(\left(\cos t_2 \cdot t_1\right) \cdot xi + \left(\sin t_2 \cdot t_1\right) \cdot yi\right) + t_0 \cdot zi \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* (* (- 1.0 ux) maxCos) ux))
        (t_1 (sqrt (- 1.0 (* t_0 t_0))))
        (t_2 (* (* uy 2.0) PI)))
   (+ (+ (* (* (cos t_2) t_1) xi) (* (* (sin t_2) t_1) yi)) (* t_0 zi))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = ((1.0f - ux) * maxCos) * ux;
	float t_1 = sqrtf((1.0f - (t_0 * t_0)));
	float t_2 = (uy * 2.0f) * ((float) M_PI);
	return (((cosf(t_2) * t_1) * xi) + ((sinf(t_2) * t_1) * yi)) + (t_0 * zi);
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(Float32(Float32(Float32(1.0) - ux) * maxCos) * ux)
	t_1 = sqrt(Float32(Float32(1.0) - Float32(t_0 * t_0)))
	t_2 = Float32(Float32(uy * Float32(2.0)) * Float32(pi))
	return Float32(Float32(Float32(Float32(cos(t_2) * t_1) * xi) + Float32(Float32(sin(t_2) * t_1) * yi)) + Float32(t_0 * zi))
end
function tmp = code(xi, yi, zi, ux, uy, maxCos)
	t_0 = ((single(1.0) - ux) * maxCos) * ux;
	t_1 = sqrt((single(1.0) - (t_0 * t_0)));
	t_2 = (uy * single(2.0)) * single(pi);
	tmp = (((cos(t_2) * t_1) * xi) + ((sin(t_2) * t_1) * yi)) + (t_0 * zi);
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\
t_1 := \sqrt{1 - t_0 \cdot t_0}\\
t_2 := \left(uy \cdot 2\right) \cdot \pi\\
\left(\left(\cos t_2 \cdot t_1\right) \cdot xi + \left(\sin t_2 \cdot t_1\right) \cdot yi\right) + t_0 \cdot zi
\end{array}
\end{array}

Sampling outcomes in binary32 precision:

Local Percentage Accuracy vs ?

The average percentage accuracy by input value. Horizontal axis shows value of an input variable; the variable is choosen in the title. Vertical axis is accuracy; higher is better. Red represent the original program, while blue represents Herbie's suggestion. These can be toggled with buttons below the plot. The line is an average while dots represent individual samples.

Accuracy vs Speed?

Herbie found 14 alternatives:

AlternativeAccuracySpeedup
The accuracy (vertical axis) and speed (horizontal axis) of each alternatives. Up and to the right is better. The red square shows the initial program, and each blue circle shows an alternative.The line shows the best available speed-accuracy tradeoffs.

Initial Program: 98.9% accurate, 1.0× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\ t_1 := \sqrt{1 - t_0 \cdot t_0}\\ t_2 := \left(uy \cdot 2\right) \cdot \pi\\ \left(\left(\cos t_2 \cdot t_1\right) \cdot xi + \left(\sin t_2 \cdot t_1\right) \cdot yi\right) + t_0 \cdot zi \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* (* (- 1.0 ux) maxCos) ux))
        (t_1 (sqrt (- 1.0 (* t_0 t_0))))
        (t_2 (* (* uy 2.0) PI)))
   (+ (+ (* (* (cos t_2) t_1) xi) (* (* (sin t_2) t_1) yi)) (* t_0 zi))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = ((1.0f - ux) * maxCos) * ux;
	float t_1 = sqrtf((1.0f - (t_0 * t_0)));
	float t_2 = (uy * 2.0f) * ((float) M_PI);
	return (((cosf(t_2) * t_1) * xi) + ((sinf(t_2) * t_1) * yi)) + (t_0 * zi);
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(Float32(Float32(Float32(1.0) - ux) * maxCos) * ux)
	t_1 = sqrt(Float32(Float32(1.0) - Float32(t_0 * t_0)))
	t_2 = Float32(Float32(uy * Float32(2.0)) * Float32(pi))
	return Float32(Float32(Float32(Float32(cos(t_2) * t_1) * xi) + Float32(Float32(sin(t_2) * t_1) * yi)) + Float32(t_0 * zi))
end
function tmp = code(xi, yi, zi, ux, uy, maxCos)
	t_0 = ((single(1.0) - ux) * maxCos) * ux;
	t_1 = sqrt((single(1.0) - (t_0 * t_0)));
	t_2 = (uy * single(2.0)) * single(pi);
	tmp = (((cos(t_2) * t_1) * xi) + ((sin(t_2) * t_1) * yi)) + (t_0 * zi);
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\
t_1 := \sqrt{1 - t_0 \cdot t_0}\\
t_2 := \left(uy \cdot 2\right) \cdot \pi\\
\left(\left(\cos t_2 \cdot t_1\right) \cdot xi + \left(\sin t_2 \cdot t_1\right) \cdot yi\right) + t_0 \cdot zi
\end{array}
\end{array}

Alternative 1: 98.9% accurate, 0.6× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\\ t_1 := \sqrt{1 + t_0 \cdot \left(ux \cdot \left(maxCos \cdot \left(ux + -1\right)\right)\right)}\\ \left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot t_1\right) \cdot xi + \left(t_1 \cdot \sin \left(\sqrt[3]{{\left(uy \cdot 2\right)}^{3} \cdot {\pi}^{3}}\right)\right) \cdot yi\right) + t_0 \cdot zi \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* ux (* (- 1.0 ux) maxCos)))
        (t_1 (sqrt (+ 1.0 (* t_0 (* ux (* maxCos (+ ux -1.0))))))))
   (+
    (+
     (* (* (cos (* (* uy 2.0) PI)) t_1) xi)
     (* (* t_1 (sin (cbrt (* (pow (* uy 2.0) 3.0) (pow PI 3.0))))) yi))
    (* t_0 zi))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = ux * ((1.0f - ux) * maxCos);
	float t_1 = sqrtf((1.0f + (t_0 * (ux * (maxCos * (ux + -1.0f))))));
	return (((cosf(((uy * 2.0f) * ((float) M_PI))) * t_1) * xi) + ((t_1 * sinf(cbrtf((powf((uy * 2.0f), 3.0f) * powf(((float) M_PI), 3.0f))))) * yi)) + (t_0 * zi);
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(ux * Float32(Float32(Float32(1.0) - ux) * maxCos))
	t_1 = sqrt(Float32(Float32(1.0) + Float32(t_0 * Float32(ux * Float32(maxCos * Float32(ux + Float32(-1.0)))))))
	return Float32(Float32(Float32(Float32(cos(Float32(Float32(uy * Float32(2.0)) * Float32(pi))) * t_1) * xi) + Float32(Float32(t_1 * sin(cbrt(Float32((Float32(uy * Float32(2.0)) ^ Float32(3.0)) * (Float32(pi) ^ Float32(3.0)))))) * yi)) + Float32(t_0 * zi))
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\\
t_1 := \sqrt{1 + t_0 \cdot \left(ux \cdot \left(maxCos \cdot \left(ux + -1\right)\right)\right)}\\
\left(\left(\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot t_1\right) \cdot xi + \left(t_1 \cdot \sin \left(\sqrt[3]{{\left(uy \cdot 2\right)}^{3} \cdot {\pi}^{3}}\right)\right) \cdot yi\right) + t_0 \cdot zi
\end{array}
\end{array}
Derivation
    &prev;&pcontext;&pcontext2;&ctx;
  1. Add Preprocessing

Alternative 2: 98.9% accurate, 0.7× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\\ t_1 := \sqrt{1 + t_0 \cdot \left(ux \cdot \left(maxCos \cdot \left(ux + -1\right)\right)\right)}\\ t_0 \cdot zi + \left(xi \cdot \left(t_1 \cdot \cos \log \left(1 + \mathsf{expm1}\left(uy \cdot \left(2 \cdot \pi\right)\right)\right)\right) + yi \cdot \left(t_1 \cdot \sin \left(\left(uy \cdot 2\right) \cdot \pi\right)\right)\right) \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* ux (* (- 1.0 ux) maxCos)))
        (t_1 (sqrt (+ 1.0 (* t_0 (* ux (* maxCos (+ ux -1.0))))))))
   (+
    (* t_0 zi)
    (+
     (* xi (* t_1 (cos (log (+ 1.0 (expm1 (* uy (* 2.0 PI))))))))
     (* yi (* t_1 (sin (* (* uy 2.0) PI))))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = ux * ((1.0f - ux) * maxCos);
	float t_1 = sqrtf((1.0f + (t_0 * (ux * (maxCos * (ux + -1.0f))))));
	return (t_0 * zi) + ((xi * (t_1 * cosf(logf((1.0f + expm1f((uy * (2.0f * ((float) M_PI))))))))) + (yi * (t_1 * sinf(((uy * 2.0f) * ((float) M_PI))))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(ux * Float32(Float32(Float32(1.0) - ux) * maxCos))
	t_1 = sqrt(Float32(Float32(1.0) + Float32(t_0 * Float32(ux * Float32(maxCos * Float32(ux + Float32(-1.0)))))))
	return Float32(Float32(t_0 * zi) + Float32(Float32(xi * Float32(t_1 * cos(log(Float32(Float32(1.0) + expm1(Float32(uy * Float32(Float32(2.0) * Float32(pi))))))))) + Float32(yi * Float32(t_1 * sin(Float32(Float32(uy * Float32(2.0)) * Float32(pi)))))))
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\\
t_1 := \sqrt{1 + t_0 \cdot \left(ux \cdot \left(maxCos \cdot \left(ux + -1\right)\right)\right)}\\
t_0 \cdot zi + \left(xi \cdot \left(t_1 \cdot \cos \log \left(1 + \mathsf{expm1}\left(uy \cdot \left(2 \cdot \pi\right)\right)\right)\right) + yi \cdot \left(t_1 \cdot \sin \left(\left(uy \cdot 2\right) \cdot \pi\right)\right)\right)
\end{array}
\end{array}
Derivation
    &prev;&pcontext;&pcontext2;&ctx;
  1. Add Preprocessing

Alternative 3: 99.0% accurate, 0.7× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := \left(1 - ux\right) \cdot \left(ux \cdot maxCos\right)\\ t_1 := uy \cdot \left(2 \cdot \pi\right)\\ \mathsf{fma}\left(\cos t_1, xi \cdot \sqrt{1 + \left(\left(ux \cdot maxCos\right) \cdot t_0\right) \cdot \left(ux + -1\right)}, \left(1 - ux\right) \cdot \left(ux \cdot \left(maxCos \cdot zi\right)\right) + \sqrt{1 - {t_0}^{2}} \cdot \left(yi \cdot \sin t_1\right)\right) \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* (- 1.0 ux) (* ux maxCos))) (t_1 (* uy (* 2.0 PI))))
   (fma
    (cos t_1)
    (* xi (sqrt (+ 1.0 (* (* (* ux maxCos) t_0) (+ ux -1.0)))))
    (+
     (* (- 1.0 ux) (* ux (* maxCos zi)))
     (* (sqrt (- 1.0 (pow t_0 2.0))) (* yi (sin t_1)))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = (1.0f - ux) * (ux * maxCos);
	float t_1 = uy * (2.0f * ((float) M_PI));
	return fmaf(cosf(t_1), (xi * sqrtf((1.0f + (((ux * maxCos) * t_0) * (ux + -1.0f))))), (((1.0f - ux) * (ux * (maxCos * zi))) + (sqrtf((1.0f - powf(t_0, 2.0f))) * (yi * sinf(t_1)))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(Float32(Float32(1.0) - ux) * Float32(ux * maxCos))
	t_1 = Float32(uy * Float32(Float32(2.0) * Float32(pi)))
	return fma(cos(t_1), Float32(xi * sqrt(Float32(Float32(1.0) + Float32(Float32(Float32(ux * maxCos) * t_0) * Float32(ux + Float32(-1.0)))))), Float32(Float32(Float32(Float32(1.0) - ux) * Float32(ux * Float32(maxCos * zi))) + Float32(sqrt(Float32(Float32(1.0) - (t_0 ^ Float32(2.0)))) * Float32(yi * sin(t_1)))))
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := \left(1 - ux\right) \cdot \left(ux \cdot maxCos\right)\\
t_1 := uy \cdot \left(2 \cdot \pi\right)\\
\mathsf{fma}\left(\cos t_1, xi \cdot \sqrt{1 + \left(\left(ux \cdot maxCos\right) \cdot t_0\right) \cdot \left(ux + -1\right)}, \left(1 - ux\right) \cdot \left(ux \cdot \left(maxCos \cdot zi\right)\right) + \sqrt{1 - {t_0}^{2}} \cdot \left(yi \cdot \sin t_1\right)\right)
\end{array}
\end{array}
Derivation
    &prev;&pcontext;&pcontext2;&ctx;
  1. Add Preprocessing

Alternative 4: 98.8% accurate, 0.9× speedup?

\[\begin{array}{l} \\ \mathsf{fma}\left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right), xi \cdot \sqrt{1 + \left(\left(ux \cdot maxCos\right) \cdot \left(\left(1 - ux\right) \cdot \left(ux \cdot maxCos\right)\right)\right) \cdot \left(ux + -1\right)}, \mathsf{fma}\left(maxCos, ux \cdot \left(\left(1 - ux\right) \cdot zi\right), yi \cdot \sin \left(\left(uy \cdot 2\right) \cdot \pi\right)\right)\right) \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (fma
  (cos (* uy (* 2.0 PI)))
  (*
   xi
   (sqrt
    (+ 1.0 (* (* (* ux maxCos) (* (- 1.0 ux) (* ux maxCos))) (+ ux -1.0)))))
  (fma maxCos (* ux (* (- 1.0 ux) zi)) (* yi (sin (* (* uy 2.0) PI))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	return fmaf(cosf((uy * (2.0f * ((float) M_PI)))), (xi * sqrtf((1.0f + (((ux * maxCos) * ((1.0f - ux) * (ux * maxCos))) * (ux + -1.0f))))), fmaf(maxCos, (ux * ((1.0f - ux) * zi)), (yi * sinf(((uy * 2.0f) * ((float) M_PI))))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	return fma(cos(Float32(uy * Float32(Float32(2.0) * Float32(pi)))), Float32(xi * sqrt(Float32(Float32(1.0) + Float32(Float32(Float32(ux * maxCos) * Float32(Float32(Float32(1.0) - ux) * Float32(ux * maxCos))) * Float32(ux + Float32(-1.0)))))), fma(maxCos, Float32(ux * Float32(Float32(Float32(1.0) - ux) * zi)), Float32(yi * sin(Float32(Float32(uy * Float32(2.0)) * Float32(pi))))))
end
\begin{array}{l}

\\
\mathsf{fma}\left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right), xi \cdot \sqrt{1 + \left(\left(ux \cdot maxCos\right) \cdot \left(\left(1 - ux\right) \cdot \left(ux \cdot maxCos\right)\right)\right) \cdot \left(ux + -1\right)}, \mathsf{fma}\left(maxCos, ux \cdot \left(\left(1 - ux\right) \cdot zi\right), yi \cdot \sin \left(\left(uy \cdot 2\right) \cdot \pi\right)\right)\right)
\end{array}
Derivation
    &prev;&pcontext;&pcontext2;&ctx;
  1. Add Preprocessing

Alternative 5: 98.9% accurate, 1.0× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := \left(uy \cdot 2\right) \cdot \pi\\ t_1 := ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\\ t_2 := \sqrt{1 + t_1 \cdot \left(ux \cdot \left(maxCos \cdot \left(ux + -1\right)\right)\right)}\\ t_1 \cdot zi + \left(\left(\cos t_0 \cdot t_2\right) \cdot xi + yi \cdot \left(t_2 \cdot \sin t_0\right)\right) \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* (* uy 2.0) PI))
        (t_1 (* ux (* (- 1.0 ux) maxCos)))
        (t_2 (sqrt (+ 1.0 (* t_1 (* ux (* maxCos (+ ux -1.0))))))))
   (+ (* t_1 zi) (+ (* (* (cos t_0) t_2) xi) (* yi (* t_2 (sin t_0)))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = (uy * 2.0f) * ((float) M_PI);
	float t_1 = ux * ((1.0f - ux) * maxCos);
	float t_2 = sqrtf((1.0f + (t_1 * (ux * (maxCos * (ux + -1.0f))))));
	return (t_1 * zi) + (((cosf(t_0) * t_2) * xi) + (yi * (t_2 * sinf(t_0))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(Float32(uy * Float32(2.0)) * Float32(pi))
	t_1 = Float32(ux * Float32(Float32(Float32(1.0) - ux) * maxCos))
	t_2 = sqrt(Float32(Float32(1.0) + Float32(t_1 * Float32(ux * Float32(maxCos * Float32(ux + Float32(-1.0)))))))
	return Float32(Float32(t_1 * zi) + Float32(Float32(Float32(cos(t_0) * t_2) * xi) + Float32(yi * Float32(t_2 * sin(t_0)))))
end
function tmp = code(xi, yi, zi, ux, uy, maxCos)
	t_0 = (uy * single(2.0)) * single(pi);
	t_1 = ux * ((single(1.0) - ux) * maxCos);
	t_2 = sqrt((single(1.0) + (t_1 * (ux * (maxCos * (ux + single(-1.0)))))));
	tmp = (t_1 * zi) + (((cos(t_0) * t_2) * xi) + (yi * (t_2 * sin(t_0))));
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := \left(uy \cdot 2\right) \cdot \pi\\
t_1 := ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\\
t_2 := \sqrt{1 + t_1 \cdot \left(ux \cdot \left(maxCos \cdot \left(ux + -1\right)\right)\right)}\\
t_1 \cdot zi + \left(\left(\cos t_0 \cdot t_2\right) \cdot xi + yi \cdot \left(t_2 \cdot \sin t_0\right)\right)
\end{array}
\end{array}
Derivation
    &prev;&pcontext;&pcontext2;&ctx;
  1. Add Preprocessing

Alternative 6: 98.7% accurate, 1.1× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := uy \cdot \left(2 \cdot \pi\right)\\ \mathsf{fma}\left(\left(1 - ux\right) \cdot maxCos, ux \cdot zi, \sqrt{1 - \left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)} \cdot \left(xi \cdot \cos t_0 + yi \cdot \sin t_0\right)\right) \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* uy (* 2.0 PI))))
   (fma
    (* (- 1.0 ux) maxCos)
    (* ux zi)
    (*
     (sqrt (- 1.0 (* (* ux maxCos) (* ux maxCos))))
     (+ (* xi (cos t_0)) (* yi (sin t_0)))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = uy * (2.0f * ((float) M_PI));
	return fmaf(((1.0f - ux) * maxCos), (ux * zi), (sqrtf((1.0f - ((ux * maxCos) * (ux * maxCos)))) * ((xi * cosf(t_0)) + (yi * sinf(t_0)))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(uy * Float32(Float32(2.0) * Float32(pi)))
	return fma(Float32(Float32(Float32(1.0) - ux) * maxCos), Float32(ux * zi), Float32(sqrt(Float32(Float32(1.0) - Float32(Float32(ux * maxCos) * Float32(ux * maxCos)))) * Float32(Float32(xi * cos(t_0)) + Float32(yi * sin(t_0)))))
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := uy \cdot \left(2 \cdot \pi\right)\\
\mathsf{fma}\left(\left(1 - ux\right) \cdot maxCos, ux \cdot zi, \sqrt{1 - \left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)} \cdot \left(xi \cdot \cos t_0 + yi \cdot \sin t_0\right)\right)
\end{array}
\end{array}
Derivation
    &prev;&pcontext;&pcontext2;&ctx;
  1. Add Preprocessing

Alternative 7: 98.8% accurate, 1.3× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := \left(uy \cdot 2\right) \cdot \pi\\ t_1 := ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\\ t_1 \cdot zi + \left(\left(\cos t_0 \cdot \sqrt{1 + t_1 \cdot \left(ux \cdot \left(maxCos \cdot \left(ux + -1\right)\right)\right)}\right) \cdot xi + yi \cdot \sin t_0\right) \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* (* uy 2.0) PI)) (t_1 (* ux (* (- 1.0 ux) maxCos))))
   (+
    (* t_1 zi)
    (+
     (* (* (cos t_0) (sqrt (+ 1.0 (* t_1 (* ux (* maxCos (+ ux -1.0))))))) xi)
     (* yi (sin t_0))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = (uy * 2.0f) * ((float) M_PI);
	float t_1 = ux * ((1.0f - ux) * maxCos);
	return (t_1 * zi) + (((cosf(t_0) * sqrtf((1.0f + (t_1 * (ux * (maxCos * (ux + -1.0f))))))) * xi) + (yi * sinf(t_0)));
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(Float32(uy * Float32(2.0)) * Float32(pi))
	t_1 = Float32(ux * Float32(Float32(Float32(1.0) - ux) * maxCos))
	return Float32(Float32(t_1 * zi) + Float32(Float32(Float32(cos(t_0) * sqrt(Float32(Float32(1.0) + Float32(t_1 * Float32(ux * Float32(maxCos * Float32(ux + Float32(-1.0)))))))) * xi) + Float32(yi * sin(t_0))))
end
function tmp = code(xi, yi, zi, ux, uy, maxCos)
	t_0 = (uy * single(2.0)) * single(pi);
	t_1 = ux * ((single(1.0) - ux) * maxCos);
	tmp = (t_1 * zi) + (((cos(t_0) * sqrt((single(1.0) + (t_1 * (ux * (maxCos * (ux + single(-1.0)))))))) * xi) + (yi * sin(t_0)));
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := \left(uy \cdot 2\right) \cdot \pi\\
t_1 := ux \cdot \left(\left(1 - ux\right) \cdot maxCos\right)\\
t_1 \cdot zi + \left(\left(\cos t_0 \cdot \sqrt{1 + t_1 \cdot \left(ux \cdot \left(maxCos \cdot \left(ux + -1\right)\right)\right)}\right) \cdot xi + yi \cdot \sin t_0\right)
\end{array}
\end{array}
Derivation
    &prev;&pcontext;&pcontext2;&ctx;
  1. Add Preprocessing

Alternative 8: 85.3% accurate, 1.4× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := \left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)\\ \mathbf{if}\;uy \leq 0.014000000432133675:\\ \;\;\;\;\mathsf{fma}\left(\left(1 - ux\right) \cdot maxCos, ux \cdot zi, \sqrt{1 + t_0 \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right)} \cdot \left(xi + \left(uy \cdot 2\right) \cdot \left(\pi \cdot yi\right)\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\mathsf{fma}\left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right), xi \cdot \sqrt{1 + t_0 \cdot \left(ux + -1\right)}, maxCos \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot zi\right)\right)\right)\\ \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* (* ux maxCos) (* ux maxCos))))
   (if (<= uy 0.014000000432133675)
     (fma
      (* (- 1.0 ux) maxCos)
      (* ux zi)
      (*
       (sqrt (+ 1.0 (* t_0 (* (- 1.0 ux) (+ ux -1.0)))))
       (+ xi (* (* uy 2.0) (* PI yi)))))
     (fma
      (cos (* uy (* 2.0 PI)))
      (* xi (sqrt (+ 1.0 (* t_0 (+ ux -1.0)))))
      (* maxCos (* ux (* (- 1.0 ux) zi)))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = (ux * maxCos) * (ux * maxCos);
	float tmp;
	if (uy <= 0.014000000432133675f) {
		tmp = fmaf(((1.0f - ux) * maxCos), (ux * zi), (sqrtf((1.0f + (t_0 * ((1.0f - ux) * (ux + -1.0f))))) * (xi + ((uy * 2.0f) * (((float) M_PI) * yi)))));
	} else {
		tmp = fmaf(cosf((uy * (2.0f * ((float) M_PI)))), (xi * sqrtf((1.0f + (t_0 * (ux + -1.0f))))), (maxCos * (ux * ((1.0f - ux) * zi))));
	}
	return tmp;
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(Float32(ux * maxCos) * Float32(ux * maxCos))
	tmp = Float32(0.0)
	if (uy <= Float32(0.014000000432133675))
		tmp = fma(Float32(Float32(Float32(1.0) - ux) * maxCos), Float32(ux * zi), Float32(sqrt(Float32(Float32(1.0) + Float32(t_0 * Float32(Float32(Float32(1.0) - ux) * Float32(ux + Float32(-1.0)))))) * Float32(xi + Float32(Float32(uy * Float32(2.0)) * Float32(Float32(pi) * yi)))));
	else
		tmp = fma(cos(Float32(uy * Float32(Float32(2.0) * Float32(pi)))), Float32(xi * sqrt(Float32(Float32(1.0) + Float32(t_0 * Float32(ux + Float32(-1.0)))))), Float32(maxCos * Float32(ux * Float32(Float32(Float32(1.0) - ux) * zi))));
	end
	return tmp
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := \left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)\\
\mathbf{if}\;uy \leq 0.014000000432133675:\\
\;\;\;\;\mathsf{fma}\left(\left(1 - ux\right) \cdot maxCos, ux \cdot zi, \sqrt{1 + t_0 \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right)} \cdot \left(xi + \left(uy \cdot 2\right) \cdot \left(\pi \cdot yi\right)\right)\right)\\

\mathbf{else}:\\
\;\;\;\;\mathsf{fma}\left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right), xi \cdot \sqrt{1 + t_0 \cdot \left(ux + -1\right)}, maxCos \cdot \left(ux \cdot \left(\left(1 - ux\right) \cdot zi\right)\right)\right)\\


\end{array}
\end{array}
Derivation
    &prev;&pcontext;&pcontext2;&ctx;
  1. Add Preprocessing

Alternative 9: 85.3% accurate, 1.4× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := \left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)\\ \mathbf{if}\;uy \leq 0.019999999552965164:\\ \;\;\;\;\mathsf{fma}\left(\left(1 - ux\right) \cdot maxCos, ux \cdot zi, \sqrt{1 + t_0 \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right)} \cdot \left(xi + \left(uy \cdot 2\right) \cdot \left(\pi \cdot yi\right)\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\mathsf{fma}\left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right), xi \cdot \sqrt{1 + t_0 \cdot \left(ux + -1\right)}, maxCos \cdot \left(ux \cdot \left(zi - ux \cdot zi\right)\right)\right)\\ \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* (* ux maxCos) (* ux maxCos))))
   (if (<= uy 0.019999999552965164)
     (fma
      (* (- 1.0 ux) maxCos)
      (* ux zi)
      (*
       (sqrt (+ 1.0 (* t_0 (* (- 1.0 ux) (+ ux -1.0)))))
       (+ xi (* (* uy 2.0) (* PI yi)))))
     (fma
      (cos (* uy (* 2.0 PI)))
      (* xi (sqrt (+ 1.0 (* t_0 (+ ux -1.0)))))
      (* maxCos (* ux (- zi (* ux zi))))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = (ux * maxCos) * (ux * maxCos);
	float tmp;
	if (uy <= 0.019999999552965164f) {
		tmp = fmaf(((1.0f - ux) * maxCos), (ux * zi), (sqrtf((1.0f + (t_0 * ((1.0f - ux) * (ux + -1.0f))))) * (xi + ((uy * 2.0f) * (((float) M_PI) * yi)))));
	} else {
		tmp = fmaf(cosf((uy * (2.0f * ((float) M_PI)))), (xi * sqrtf((1.0f + (t_0 * (ux + -1.0f))))), (maxCos * (ux * (zi - (ux * zi)))));
	}
	return tmp;
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(Float32(ux * maxCos) * Float32(ux * maxCos))
	tmp = Float32(0.0)
	if (uy <= Float32(0.019999999552965164))
		tmp = fma(Float32(Float32(Float32(1.0) - ux) * maxCos), Float32(ux * zi), Float32(sqrt(Float32(Float32(1.0) + Float32(t_0 * Float32(Float32(Float32(1.0) - ux) * Float32(ux + Float32(-1.0)))))) * Float32(xi + Float32(Float32(uy * Float32(2.0)) * Float32(Float32(pi) * yi)))));
	else
		tmp = fma(cos(Float32(uy * Float32(Float32(2.0) * Float32(pi)))), Float32(xi * sqrt(Float32(Float32(1.0) + Float32(t_0 * Float32(ux + Float32(-1.0)))))), Float32(maxCos * Float32(ux * Float32(zi - Float32(ux * zi)))));
	end
	return tmp
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := \left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)\\
\mathbf{if}\;uy \leq 0.019999999552965164:\\
\;\;\;\;\mathsf{fma}\left(\left(1 - ux\right) \cdot maxCos, ux \cdot zi, \sqrt{1 + t_0 \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right)} \cdot \left(xi + \left(uy \cdot 2\right) \cdot \left(\pi \cdot yi\right)\right)\right)\\

\mathbf{else}:\\
\;\;\;\;\mathsf{fma}\left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right), xi \cdot \sqrt{1 + t_0 \cdot \left(ux + -1\right)}, maxCos \cdot \left(ux \cdot \left(zi - ux \cdot zi\right)\right)\right)\\


\end{array}
\end{array}
Derivation
    &prev;&pcontext;&pcontext2;&ctx;
  1. Add Preprocessing

Alternative 10: 90.2% accurate, 1.4× speedup?

\[\begin{array}{l} \\ \mathsf{fma}\left(\left(1 - ux\right) \cdot maxCos, ux \cdot zi, \sqrt{1 - \left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)} \cdot \left(xi \cdot \cos \left(uy \cdot \left(2 \cdot \pi\right)\right) + \left(uy \cdot 2\right) \cdot \left(\pi \cdot yi\right)\right)\right) \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (fma
  (* (- 1.0 ux) maxCos)
  (* ux zi)
  (*
   (sqrt (- 1.0 (* (* ux maxCos) (* ux maxCos))))
   (+ (* xi (cos (* uy (* 2.0 PI)))) (* (* uy 2.0) (* PI yi))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	return fmaf(((1.0f - ux) * maxCos), (ux * zi), (sqrtf((1.0f - ((ux * maxCos) * (ux * maxCos)))) * ((xi * cosf((uy * (2.0f * ((float) M_PI))))) + ((uy * 2.0f) * (((float) M_PI) * yi)))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	return fma(Float32(Float32(Float32(1.0) - ux) * maxCos), Float32(ux * zi), Float32(sqrt(Float32(Float32(1.0) - Float32(Float32(ux * maxCos) * Float32(ux * maxCos)))) * Float32(Float32(xi * cos(Float32(uy * Float32(Float32(2.0) * Float32(pi))))) + Float32(Float32(uy * Float32(2.0)) * Float32(Float32(pi) * yi)))))
end
\begin{array}{l}

\\
\mathsf{fma}\left(\left(1 - ux\right) \cdot maxCos, ux \cdot zi, \sqrt{1 - \left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)} \cdot \left(xi \cdot \cos \left(uy \cdot \left(2 \cdot \pi\right)\right) + \left(uy \cdot 2\right) \cdot \left(\pi \cdot yi\right)\right)\right)
\end{array}
Derivation
    &prev;&pcontext;&pcontext2;&ctx;
  1. Add Preprocessing

Alternative 11: 85.0% accurate, 1.4× speedup?

\[\begin{array}{l} \\ \begin{array}{l} t_0 := \left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)\\ \mathbf{if}\;uy \leq 0.019999999552965164:\\ \;\;\;\;\mathsf{fma}\left(\left(1 - ux\right) \cdot maxCos, ux \cdot zi, \sqrt{1 + t_0 \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right)} \cdot \left(xi + \left(uy \cdot 2\right) \cdot \left(\pi \cdot yi\right)\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\mathsf{fma}\left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right), xi \cdot \sqrt{1 + t_0 \cdot \left(ux + -1\right)}, maxCos \cdot \left(ux \cdot zi\right)\right)\\ \end{array} \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (let* ((t_0 (* (* ux maxCos) (* ux maxCos))))
   (if (<= uy 0.019999999552965164)
     (fma
      (* (- 1.0 ux) maxCos)
      (* ux zi)
      (*
       (sqrt (+ 1.0 (* t_0 (* (- 1.0 ux) (+ ux -1.0)))))
       (+ xi (* (* uy 2.0) (* PI yi)))))
     (fma
      (cos (* uy (* 2.0 PI)))
      (* xi (sqrt (+ 1.0 (* t_0 (+ ux -1.0)))))
      (* maxCos (* ux zi))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	float t_0 = (ux * maxCos) * (ux * maxCos);
	float tmp;
	if (uy <= 0.019999999552965164f) {
		tmp = fmaf(((1.0f - ux) * maxCos), (ux * zi), (sqrtf((1.0f + (t_0 * ((1.0f - ux) * (ux + -1.0f))))) * (xi + ((uy * 2.0f) * (((float) M_PI) * yi)))));
	} else {
		tmp = fmaf(cosf((uy * (2.0f * ((float) M_PI)))), (xi * sqrtf((1.0f + (t_0 * (ux + -1.0f))))), (maxCos * (ux * zi)));
	}
	return tmp;
}
function code(xi, yi, zi, ux, uy, maxCos)
	t_0 = Float32(Float32(ux * maxCos) * Float32(ux * maxCos))
	tmp = Float32(0.0)
	if (uy <= Float32(0.019999999552965164))
		tmp = fma(Float32(Float32(Float32(1.0) - ux) * maxCos), Float32(ux * zi), Float32(sqrt(Float32(Float32(1.0) + Float32(t_0 * Float32(Float32(Float32(1.0) - ux) * Float32(ux + Float32(-1.0)))))) * Float32(xi + Float32(Float32(uy * Float32(2.0)) * Float32(Float32(pi) * yi)))));
	else
		tmp = fma(cos(Float32(uy * Float32(Float32(2.0) * Float32(pi)))), Float32(xi * sqrt(Float32(Float32(1.0) + Float32(t_0 * Float32(ux + Float32(-1.0)))))), Float32(maxCos * Float32(ux * zi)));
	end
	return tmp
end
\begin{array}{l}

\\
\begin{array}{l}
t_0 := \left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)\\
\mathbf{if}\;uy \leq 0.019999999552965164:\\
\;\;\;\;\mathsf{fma}\left(\left(1 - ux\right) \cdot maxCos, ux \cdot zi, \sqrt{1 + t_0 \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right)} \cdot \left(xi + \left(uy \cdot 2\right) \cdot \left(\pi \cdot yi\right)\right)\right)\\

\mathbf{else}:\\
\;\;\;\;\mathsf{fma}\left(\cos \left(uy \cdot \left(2 \cdot \pi\right)\right), xi \cdot \sqrt{1 + t_0 \cdot \left(ux + -1\right)}, maxCos \cdot \left(ux \cdot zi\right)\right)\\


\end{array}
\end{array}
Derivation
    &prev;&pcontext;&pcontext2;&ctx;
  1. Add Preprocessing

Alternative 12: 88.5% accurate, 1.4× speedup?

\[\begin{array}{l} \\ \mathsf{fma}\left(\left(1 - ux\right) \cdot maxCos, ux \cdot zi, \sqrt{1 - \left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)} \cdot \left(xi + yi \cdot \sin \left(uy \cdot \left(2 \cdot \pi\right)\right)\right)\right) \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (fma
  (* (- 1.0 ux) maxCos)
  (* ux zi)
  (*
   (sqrt (- 1.0 (* (* ux maxCos) (* ux maxCos))))
   (+ xi (* yi (sin (* uy (* 2.0 PI))))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	return fmaf(((1.0f - ux) * maxCos), (ux * zi), (sqrtf((1.0f - ((ux * maxCos) * (ux * maxCos)))) * (xi + (yi * sinf((uy * (2.0f * ((float) M_PI))))))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	return fma(Float32(Float32(Float32(1.0) - ux) * maxCos), Float32(ux * zi), Float32(sqrt(Float32(Float32(1.0) - Float32(Float32(ux * maxCos) * Float32(ux * maxCos)))) * Float32(xi + Float32(yi * sin(Float32(uy * Float32(Float32(2.0) * Float32(pi))))))))
end
\begin{array}{l}

\\
\mathsf{fma}\left(\left(1 - ux\right) \cdot maxCos, ux \cdot zi, \sqrt{1 - \left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)} \cdot \left(xi + yi \cdot \sin \left(uy \cdot \left(2 \cdot \pi\right)\right)\right)\right)
\end{array}
Derivation
    &prev;&pcontext;&pcontext2;&ctx;
  1. Add Preprocessing

Alternative 13: 81.9% accurate, 2.0× speedup?

\[\begin{array}{l} \\ \mathsf{fma}\left(\left(1 - ux\right) \cdot maxCos, ux \cdot zi, \sqrt{1 + \left(\left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)\right) \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right)} \cdot \left(xi + \left(uy \cdot 2\right) \cdot \left(\pi \cdot yi\right)\right)\right) \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (fma
  (* (- 1.0 ux) maxCos)
  (* ux zi)
  (*
   (sqrt
    (+ 1.0 (* (* (* ux maxCos) (* ux maxCos)) (* (- 1.0 ux) (+ ux -1.0)))))
   (+ xi (* (* uy 2.0) (* PI yi))))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	return fmaf(((1.0f - ux) * maxCos), (ux * zi), (sqrtf((1.0f + (((ux * maxCos) * (ux * maxCos)) * ((1.0f - ux) * (ux + -1.0f))))) * (xi + ((uy * 2.0f) * (((float) M_PI) * yi)))));
}
function code(xi, yi, zi, ux, uy, maxCos)
	return fma(Float32(Float32(Float32(1.0) - ux) * maxCos), Float32(ux * zi), Float32(sqrt(Float32(Float32(1.0) + Float32(Float32(Float32(ux * maxCos) * Float32(ux * maxCos)) * Float32(Float32(Float32(1.0) - ux) * Float32(ux + Float32(-1.0)))))) * Float32(xi + Float32(Float32(uy * Float32(2.0)) * Float32(Float32(pi) * yi)))))
end
\begin{array}{l}

\\
\mathsf{fma}\left(\left(1 - ux\right) \cdot maxCos, ux \cdot zi, \sqrt{1 + \left(\left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)\right) \cdot \left(\left(1 - ux\right) \cdot \left(ux + -1\right)\right)} \cdot \left(xi + \left(uy \cdot 2\right) \cdot \left(\pi \cdot yi\right)\right)\right)
\end{array}
Derivation
    &prev;&pcontext;&pcontext2;&ctx;
  1. Add Preprocessing

Alternative 14: 49.9% accurate, 2.1× speedup?

\[\begin{array}{l} \\ \mathsf{fma}\left(1, xi \cdot \sqrt{1 + \left(\left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)\right) \cdot \left(ux + -1\right)}, maxCos \cdot \left(ux \cdot zi\right)\right) \end{array} \]
(FPCore (xi yi zi ux uy maxCos)
 :precision binary32
 (fma
  1.0
  (* xi (sqrt (+ 1.0 (* (* (* ux maxCos) (* ux maxCos)) (+ ux -1.0)))))
  (* maxCos (* ux zi))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
	return fmaf(1.0f, (xi * sqrtf((1.0f + (((ux * maxCos) * (ux * maxCos)) * (ux + -1.0f))))), (maxCos * (ux * zi)));
}
function code(xi, yi, zi, ux, uy, maxCos)
	return fma(Float32(1.0), Float32(xi * sqrt(Float32(Float32(1.0) + Float32(Float32(Float32(ux * maxCos) * Float32(ux * maxCos)) * Float32(ux + Float32(-1.0)))))), Float32(maxCos * Float32(ux * zi)))
end
\begin{array}{l}

\\
\mathsf{fma}\left(1, xi \cdot \sqrt{1 + \left(\left(ux \cdot maxCos\right) \cdot \left(ux \cdot maxCos\right)\right) \cdot \left(ux + -1\right)}, maxCos \cdot \left(ux \cdot zi\right)\right)
\end{array}
Derivation
    &prev;&pcontext;&pcontext2;&ctx;
  1. Add Preprocessing

Reproduce

?
herbie shell --seed 2024006 
(FPCore (xi yi zi ux uy maxCos)
  :name "UniformSampleCone 2"
  :precision binary32
  :pre (and (and (and (and (and (and (<= -10000.0 xi) (<= xi 10000.0)) (and (<= -10000.0 yi) (<= yi 10000.0))) (and (<= -10000.0 zi) (<= zi 10000.0))) (and (<= 2.328306437e-10 ux) (<= ux 1.0))) (and (<= 2.328306437e-10 uy) (<= uy 1.0))) (and (<= 0.0 maxCos) (<= maxCos 1.0)))
  (+ (+ (* (* (cos (* (* uy 2.0) PI)) (sqrt (- 1.0 (* (* (* (- 1.0 ux) maxCos) ux) (* (* (- 1.0 ux) maxCos) ux))))) xi) (* (* (sin (* (* uy 2.0) PI)) (sqrt (- 1.0 (* (* (* (- 1.0 ux) maxCos) ux) (* (* (- 1.0 ux) maxCos) ux))))) yi)) (* (* (* (- 1.0 ux) maxCos) ux) zi)))