
(FPCore (xi yi zi ux uy maxCos)
:precision binary32
(let* ((t_0 (* (* (- 1.0 ux) maxCos) ux))
(t_1 (sqrt (- 1.0 (* t_0 t_0))))
(t_2 (* (* uy 2.0) PI)))
(+ (+ (* (* (cos t_2) t_1) xi) (* (* (sin t_2) t_1) yi)) (* t_0 zi))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
float t_0 = ((1.0f - ux) * maxCos) * ux;
float t_1 = sqrtf((1.0f - (t_0 * t_0)));
float t_2 = (uy * 2.0f) * ((float) M_PI);
return (((cosf(t_2) * t_1) * xi) + ((sinf(t_2) * t_1) * yi)) + (t_0 * zi);
}
function code(xi, yi, zi, ux, uy, maxCos) t_0 = Float32(Float32(Float32(Float32(1.0) - ux) * maxCos) * ux) t_1 = sqrt(Float32(Float32(1.0) - Float32(t_0 * t_0))) t_2 = Float32(Float32(uy * Float32(2.0)) * Float32(pi)) return Float32(Float32(Float32(Float32(cos(t_2) * t_1) * xi) + Float32(Float32(sin(t_2) * t_1) * yi)) + Float32(t_0 * zi)) end
function tmp = code(xi, yi, zi, ux, uy, maxCos) t_0 = ((single(1.0) - ux) * maxCos) * ux; t_1 = sqrt((single(1.0) - (t_0 * t_0))); t_2 = (uy * single(2.0)) * single(pi); tmp = (((cos(t_2) * t_1) * xi) + ((sin(t_2) * t_1) * yi)) + (t_0 * zi); end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\
t_1 := \sqrt{1 - t\_0 \cdot t\_0}\\
t_2 := \left(uy \cdot 2\right) \cdot \pi\\
\left(\left(\cos t\_2 \cdot t\_1\right) \cdot xi + \left(\sin t\_2 \cdot t\_1\right) \cdot yi\right) + t\_0 \cdot zi
\end{array}
\end{array}
Sampling outcomes in binary32 precision:
Herbie found 1 alternatives:
| Alternative | Accuracy | Speedup |
|---|
(FPCore (xi yi zi ux uy maxCos)
:precision binary32
(let* ((t_0 (* (* (- 1.0 ux) maxCos) ux))
(t_1 (sqrt (- 1.0 (* t_0 t_0))))
(t_2 (* (* uy 2.0) PI)))
(+ (+ (* (* (cos t_2) t_1) xi) (* (* (sin t_2) t_1) yi)) (* t_0 zi))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
float t_0 = ((1.0f - ux) * maxCos) * ux;
float t_1 = sqrtf((1.0f - (t_0 * t_0)));
float t_2 = (uy * 2.0f) * ((float) M_PI);
return (((cosf(t_2) * t_1) * xi) + ((sinf(t_2) * t_1) * yi)) + (t_0 * zi);
}
function code(xi, yi, zi, ux, uy, maxCos) t_0 = Float32(Float32(Float32(Float32(1.0) - ux) * maxCos) * ux) t_1 = sqrt(Float32(Float32(1.0) - Float32(t_0 * t_0))) t_2 = Float32(Float32(uy * Float32(2.0)) * Float32(pi)) return Float32(Float32(Float32(Float32(cos(t_2) * t_1) * xi) + Float32(Float32(sin(t_2) * t_1) * yi)) + Float32(t_0 * zi)) end
function tmp = code(xi, yi, zi, ux, uy, maxCos) t_0 = ((single(1.0) - ux) * maxCos) * ux; t_1 = sqrt((single(1.0) - (t_0 * t_0))); t_2 = (uy * single(2.0)) * single(pi); tmp = (((cos(t_2) * t_1) * xi) + ((sin(t_2) * t_1) * yi)) + (t_0 * zi); end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\
t_1 := \sqrt{1 - t\_0 \cdot t\_0}\\
t_2 := \left(uy \cdot 2\right) \cdot \pi\\
\left(\left(\cos t\_2 \cdot t\_1\right) \cdot xi + \left(\sin t\_2 \cdot t\_1\right) \cdot yi\right) + t\_0 \cdot zi
\end{array}
\end{array}
(FPCore (xi yi zi ux uy maxCos)
:precision binary32
(let* ((t_0 (* (* (- 1.0 ux) maxCos) ux)))
(fma
(* (sin (fma (- PI) (* uy 2.0) (/ PI 2.0))) (sin (acos t_0)))
xi
(fma
(sin (* (* uy 2.0) PI))
(*
(sqrt
(*
(-
(pow ux -4.0)
(fma
(/ (* maxCos maxCos) ux)
-2.0
(fma maxCos maxCos (* (/ maxCos ux) (/ maxCos ux)))))
(pow ux 4.0)))
yi)
(* t_0 zi)))))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
float t_0 = ((1.0f - ux) * maxCos) * ux;
return fmaf((sinf(fmaf(-((float) M_PI), (uy * 2.0f), (((float) M_PI) / 2.0f))) * sinf(acosf(t_0))), xi, fmaf(sinf(((uy * 2.0f) * ((float) M_PI))), (sqrtf(((powf(ux, -4.0f) - fmaf(((maxCos * maxCos) / ux), -2.0f, fmaf(maxCos, maxCos, ((maxCos / ux) * (maxCos / ux))))) * powf(ux, 4.0f))) * yi), (t_0 * zi)));
}
function code(xi, yi, zi, ux, uy, maxCos) t_0 = Float32(Float32(Float32(Float32(1.0) - ux) * maxCos) * ux) return fma(Float32(sin(fma(Float32(-Float32(pi)), Float32(uy * Float32(2.0)), Float32(Float32(pi) / Float32(2.0)))) * sin(acos(t_0))), xi, fma(sin(Float32(Float32(uy * Float32(2.0)) * Float32(pi))), Float32(sqrt(Float32(Float32((ux ^ Float32(-4.0)) - fma(Float32(Float32(maxCos * maxCos) / ux), Float32(-2.0), fma(maxCos, maxCos, Float32(Float32(maxCos / ux) * Float32(maxCos / ux))))) * (ux ^ Float32(4.0)))) * yi), Float32(t_0 * zi))) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\
\mathsf{fma}\left(\sin \left(\mathsf{fma}\left(-\pi, uy \cdot 2, \frac{\pi}{2}\right)\right) \cdot \sin \cos^{-1} t\_0, xi, \mathsf{fma}\left(\sin \left(\left(uy \cdot 2\right) \cdot \pi\right), \sqrt{\left({ux}^{-4} - \mathsf{fma}\left(\frac{maxCos \cdot maxCos}{ux}, -2, \mathsf{fma}\left(maxCos, maxCos, \frac{maxCos}{ux} \cdot \frac{maxCos}{ux}\right)\right)\right) \cdot {ux}^{4}} \cdot yi, t\_0 \cdot zi\right)\right)
\end{array}
\end{array}
Initial program 99.1%
Taylor expanded in ux around inf
*-commutativeN/A
lower-*.f32N/A
Applied rewrites99.2%
Applied rewrites99.2%
lift-cos.f32N/A
cos-neg-revN/A
sin-+PI/2-revN/A
lower-sin.f32N/A
lower-+.f32N/A
lower-neg.f32N/A
lift-PI.f32N/A
lift-*.f32N/A
*-commutativeN/A
lower-*.f32N/A
lift-PI.f32N/A
lift-*.f32N/A
*-commutativeN/A
lower-*.f32N/A
lower-/.f32N/A
lift-PI.f3299.2
Applied rewrites99.2%
lift-+.f32N/A
lift-neg.f32N/A
lift-PI.f32N/A
lift-*.f32N/A
lift-*.f32N/A
distribute-lft-neg-inN/A
lower-fma.f32N/A
lower-neg.f32N/A
lift-PI.f32N/A
*-commutativeN/A
lift-*.f3299.3
Applied rewrites99.3%
herbie shell --seed 2025062 -o reduce:regimes -o reduce:simplify
(FPCore (xi yi zi ux uy maxCos)
:name "UniformSampleCone 2"
:precision binary32
:pre (and (and (and (and (and (and (<= -10000.0 xi) (<= xi 10000.0)) (and (<= -10000.0 yi) (<= yi 10000.0))) (and (<= -10000.0 zi) (<= zi 10000.0))) (and (<= 2.328306437e-10 ux) (<= ux 1.0))) (and (<= 2.328306437e-10 uy) (<= uy 1.0))) (and (<= 0.0 maxCos) (<= maxCos 1.0)))
(+ (+ (* (* (cos (* (* uy 2.0) PI)) (sqrt (- 1.0 (* (* (* (- 1.0 ux) maxCos) ux) (* (* (- 1.0 ux) maxCos) ux))))) xi) (* (* (sin (* (* uy 2.0) PI)) (sqrt (- 1.0 (* (* (* (- 1.0 ux) maxCos) ux) (* (* (- 1.0 ux) maxCos) ux))))) yi)) (* (* (* (- 1.0 ux) maxCos) ux) zi)))