
(FPCore (ux uy maxCos) :precision binary32 (let* ((t_0 (+ (- 1.0 ux) (* ux maxCos)))) (* (cos (* (* uy 2.0) PI)) (sqrt (- 1.0 (* t_0 t_0))))))
float code(float ux, float uy, float maxCos) {
float t_0 = (1.0f - ux) + (ux * maxCos);
return cosf(((uy * 2.0f) * ((float) M_PI))) * sqrtf((1.0f - (t_0 * t_0)));
}
function code(ux, uy, maxCos) t_0 = Float32(Float32(Float32(1.0) - ux) + Float32(ux * maxCos)) return Float32(cos(Float32(Float32(uy * Float32(2.0)) * Float32(pi))) * sqrt(Float32(Float32(1.0) - Float32(t_0 * t_0)))) end
function tmp = code(ux, uy, maxCos) t_0 = (single(1.0) - ux) + (ux * maxCos); tmp = cos(((uy * single(2.0)) * single(pi))) * sqrt((single(1.0) - (t_0 * t_0))); end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(1 - ux\right) + ux \cdot maxCos\\
\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - t\_0 \cdot t\_0}
\end{array}
\end{array}
Sampling outcomes in binary32 precision:
Herbie found 5 alternatives:
| Alternative | Accuracy | Speedup |
|---|
(FPCore (ux uy maxCos) :precision binary32 (let* ((t_0 (+ (- 1.0 ux) (* ux maxCos)))) (* (cos (* (* uy 2.0) PI)) (sqrt (- 1.0 (* t_0 t_0))))))
float code(float ux, float uy, float maxCos) {
float t_0 = (1.0f - ux) + (ux * maxCos);
return cosf(((uy * 2.0f) * ((float) M_PI))) * sqrtf((1.0f - (t_0 * t_0)));
}
function code(ux, uy, maxCos) t_0 = Float32(Float32(Float32(1.0) - ux) + Float32(ux * maxCos)) return Float32(cos(Float32(Float32(uy * Float32(2.0)) * Float32(pi))) * sqrt(Float32(Float32(1.0) - Float32(t_0 * t_0)))) end
function tmp = code(ux, uy, maxCos) t_0 = (single(1.0) - ux) + (ux * maxCos); tmp = cos(((uy * single(2.0)) * single(pi))) * sqrt((single(1.0) - (t_0 * t_0))); end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(1 - ux\right) + ux \cdot maxCos\\
\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - t\_0 \cdot t\_0}
\end{array}
\end{array}
(FPCore (ux uy maxCos) :precision binary32 (* (cos (* (* uy 2.0) PI)) (sqrt (* (- (fma (* (pow (- maxCos 1.0) 2.0) ux) -1.0 2.0) (* maxCos 2.0)) ux))))
float code(float ux, float uy, float maxCos) {
return cosf(((uy * 2.0f) * ((float) M_PI))) * sqrtf(((fmaf((powf((maxCos - 1.0f), 2.0f) * ux), -1.0f, 2.0f) - (maxCos * 2.0f)) * ux));
}
function code(ux, uy, maxCos) return Float32(cos(Float32(Float32(uy * Float32(2.0)) * Float32(pi))) * sqrt(Float32(Float32(fma(Float32((Float32(maxCos - Float32(1.0)) ^ Float32(2.0)) * ux), Float32(-1.0), Float32(2.0)) - Float32(maxCos * Float32(2.0))) * ux))) end
\begin{array}{l}
\\
\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{\left(\mathsf{fma}\left({\left(maxCos - 1\right)}^{2} \cdot ux, -1, 2\right) - maxCos \cdot 2\right) \cdot ux}
\end{array}
Initial program 57.3%
Taylor expanded in ux around 0
*-commutativeN/A
lower-*.f32N/A
lower--.f32N/A
+-commutativeN/A
*-commutativeN/A
lower-fma.f32N/A
*-commutativeN/A
lower-*.f32N/A
lower-pow.f32N/A
lower--.f32N/A
*-commutativeN/A
lower-*.f3299.1
Applied rewrites99.1%
(FPCore (ux uy maxCos)
:precision binary32
(*
(cos (* (* uy 2.0) PI))
(sqrt
(*
(* ux ux)
(-
(fma -1.0 (pow (- 1.0 maxCos) 2.0) (* 2.0 (/ 1.0 ux)))
(* 2.0 (/ maxCos ux)))))))
float code(float ux, float uy, float maxCos) {
return cosf(((uy * 2.0f) * ((float) M_PI))) * sqrtf(((ux * ux) * (fmaf(-1.0f, powf((1.0f - maxCos), 2.0f), (2.0f * (1.0f / ux))) - (2.0f * (maxCos / ux)))));
}
function code(ux, uy, maxCos) return Float32(cos(Float32(Float32(uy * Float32(2.0)) * Float32(pi))) * sqrt(Float32(Float32(ux * ux) * Float32(fma(Float32(-1.0), (Float32(Float32(1.0) - maxCos) ^ Float32(2.0)), Float32(Float32(2.0) * Float32(Float32(1.0) / ux))) - Float32(Float32(2.0) * Float32(maxCos / ux)))))) end
\begin{array}{l}
\\
\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{\left(ux \cdot ux\right) \cdot \left(\mathsf{fma}\left(-1, {\left(1 - maxCos\right)}^{2}, 2 \cdot \frac{1}{ux}\right) - 2 \cdot \frac{maxCos}{ux}\right)}
\end{array}
Initial program 57.3%
Taylor expanded in ux around -inf
*-commutativeN/A
lower-*.f32N/A
Applied rewrites98.9%
Taylor expanded in maxCos around inf
lower-*.f32N/A
lower--.f32N/A
lower-*.f32N/A
lower-/.f32N/A
lower-*.f32N/A
lower-*.f32N/A
lift-/.f3288.5
Applied rewrites88.5%
Taylor expanded in ux around inf
lower-*.f32N/A
pow2N/A
lift-*.f32N/A
lower--.f32N/A
lower-fma.f32N/A
lower-pow.f32N/A
lower-+.f32N/A
lower-*.f32N/A
lift-/.f32N/A
lift-*.f32N/A
lower-*.f32N/A
lower-/.f3299.0
Applied rewrites99.0%
Final simplification99.0%
(FPCore (ux uy maxCos)
:precision binary32
(let* ((t_0 (- 2.0 (* maxCos 2.0))) (t_1 (pow t_0 2.5)) (t_2 (pow t_0 1.5)))
(*
(cos (* (* uy 2.0) PI))
(fma
(fma
(fma
(* -0.125 (sqrt (/ (/ 1.0 (pow ux 3.0)) (* t_2 t_2))))
(pow (- maxCos 1.0) 4.0)
(*
(* -0.0625 (sqrt (/ (/ 1.0 ux) (* t_1 t_1))))
(pow (- maxCos 1.0) 6.0)))
(* ux ux)
(* (* -0.5 (sqrt (/ (/ 1.0 ux) t_0))) (pow (- maxCos 1.0) 2.0)))
(* ux ux)
(sqrt (* ux t_0))))))
float code(float ux, float uy, float maxCos) {
float t_0 = 2.0f - (maxCos * 2.0f);
float t_1 = powf(t_0, 2.5f);
float t_2 = powf(t_0, 1.5f);
return cosf(((uy * 2.0f) * ((float) M_PI))) * fmaf(fmaf(fmaf((-0.125f * sqrtf(((1.0f / powf(ux, 3.0f)) / (t_2 * t_2)))), powf((maxCos - 1.0f), 4.0f), ((-0.0625f * sqrtf(((1.0f / ux) / (t_1 * t_1)))) * powf((maxCos - 1.0f), 6.0f))), (ux * ux), ((-0.5f * sqrtf(((1.0f / ux) / t_0))) * powf((maxCos - 1.0f), 2.0f))), (ux * ux), sqrtf((ux * t_0)));
}
function code(ux, uy, maxCos) t_0 = Float32(Float32(2.0) - Float32(maxCos * Float32(2.0))) t_1 = t_0 ^ Float32(2.5) t_2 = t_0 ^ Float32(1.5) return Float32(cos(Float32(Float32(uy * Float32(2.0)) * Float32(pi))) * fma(fma(fma(Float32(Float32(-0.125) * sqrt(Float32(Float32(Float32(1.0) / (ux ^ Float32(3.0))) / Float32(t_2 * t_2)))), (Float32(maxCos - Float32(1.0)) ^ Float32(4.0)), Float32(Float32(Float32(-0.0625) * sqrt(Float32(Float32(Float32(1.0) / ux) / Float32(t_1 * t_1)))) * (Float32(maxCos - Float32(1.0)) ^ Float32(6.0)))), Float32(ux * ux), Float32(Float32(Float32(-0.5) * sqrt(Float32(Float32(Float32(1.0) / ux) / t_0))) * (Float32(maxCos - Float32(1.0)) ^ Float32(2.0)))), Float32(ux * ux), sqrt(Float32(ux * t_0)))) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := 2 - maxCos \cdot 2\\
t_1 := {t\_0}^{2.5}\\
t_2 := {t\_0}^{1.5}\\
\cos \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \mathsf{fma}\left(\mathsf{fma}\left(\mathsf{fma}\left(-0.125 \cdot \sqrt{\frac{\frac{1}{{ux}^{3}}}{t\_2 \cdot t\_2}}, {\left(maxCos - 1\right)}^{4}, \left(-0.0625 \cdot \sqrt{\frac{\frac{1}{ux}}{t\_1 \cdot t\_1}}\right) \cdot {\left(maxCos - 1\right)}^{6}\right), ux \cdot ux, \left(-0.5 \cdot \sqrt{\frac{\frac{1}{ux}}{t\_0}}\right) \cdot {\left(maxCos - 1\right)}^{2}\right), ux \cdot ux, \sqrt{ux \cdot t\_0}\right)
\end{array}
\end{array}
Initial program 57.3%
Taylor expanded in ux around 0
Applied rewrites97.0%
lift-*.f32N/A
lift-sqrt.f32N/A
lift-sqrt.f32N/A
sqrt-unprodN/A
lower-sqrt.f32N/A
lower-*.f3297.4
Applied rewrites97.4%
(FPCore (ux uy maxCos)
:precision binary32
(let* ((t_0 (- 2.0 (* 2.0 maxCos)))
(t_1 (* ux t_0))
(t_2 (pow t_1 0.5))
(t_3 (sin (fma 2.0 (* uy PI) (/ PI 2.0)))))
(fma
t_2
(sin (* uy (fma 0.5 (/ PI uy) (* 2.0 PI))))
(*
(* ux ux)
(+
(* -0.5 (* (/ 1.0 t_2) (* t_3 (pow (- maxCos 1.0) 2.0))))
(*
(* ux ux)
(fma
-0.125
(* (pow (/ 1.0 (pow t_1 3.0)) 0.5) (* t_3 (pow (- maxCos 1.0) 4.0)))
(*
-0.0625
(*
(pow (/ 1.0 (* ux (pow t_0 5.0))) 0.5)
(* t_3 (pow (- maxCos 1.0) 6.0)))))))))))
float code(float ux, float uy, float maxCos) {
float t_0 = 2.0f - (2.0f * maxCos);
float t_1 = ux * t_0;
float t_2 = powf(t_1, 0.5f);
float t_3 = sinf(fmaf(2.0f, (uy * ((float) M_PI)), (((float) M_PI) / 2.0f)));
return fmaf(t_2, sinf((uy * fmaf(0.5f, (((float) M_PI) / uy), (2.0f * ((float) M_PI))))), ((ux * ux) * ((-0.5f * ((1.0f / t_2) * (t_3 * powf((maxCos - 1.0f), 2.0f)))) + ((ux * ux) * fmaf(-0.125f, (powf((1.0f / powf(t_1, 3.0f)), 0.5f) * (t_3 * powf((maxCos - 1.0f), 4.0f))), (-0.0625f * (powf((1.0f / (ux * powf(t_0, 5.0f))), 0.5f) * (t_3 * powf((maxCos - 1.0f), 6.0f)))))))));
}
function code(ux, uy, maxCos) t_0 = Float32(Float32(2.0) - Float32(Float32(2.0) * maxCos)) t_1 = Float32(ux * t_0) t_2 = t_1 ^ Float32(0.5) t_3 = sin(fma(Float32(2.0), Float32(uy * Float32(pi)), Float32(Float32(pi) / Float32(2.0)))) return fma(t_2, sin(Float32(uy * fma(Float32(0.5), Float32(Float32(pi) / uy), Float32(Float32(2.0) * Float32(pi))))), Float32(Float32(ux * ux) * Float32(Float32(Float32(-0.5) * Float32(Float32(Float32(1.0) / t_2) * Float32(t_3 * (Float32(maxCos - Float32(1.0)) ^ Float32(2.0))))) + Float32(Float32(ux * ux) * fma(Float32(-0.125), Float32((Float32(Float32(1.0) / (t_1 ^ Float32(3.0))) ^ Float32(0.5)) * Float32(t_3 * (Float32(maxCos - Float32(1.0)) ^ Float32(4.0)))), Float32(Float32(-0.0625) * Float32((Float32(Float32(1.0) / Float32(ux * (t_0 ^ Float32(5.0)))) ^ Float32(0.5)) * Float32(t_3 * (Float32(maxCos - Float32(1.0)) ^ Float32(6.0)))))))))) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := 2 - 2 \cdot maxCos\\
t_1 := ux \cdot t\_0\\
t_2 := {t\_1}^{0.5}\\
t_3 := \sin \left(\mathsf{fma}\left(2, uy \cdot \pi, \frac{\pi}{2}\right)\right)\\
\mathsf{fma}\left(t\_2, \sin \left(uy \cdot \mathsf{fma}\left(0.5, \frac{\pi}{uy}, 2 \cdot \pi\right)\right), \left(ux \cdot ux\right) \cdot \left(-0.5 \cdot \left(\frac{1}{t\_2} \cdot \left(t\_3 \cdot {\left(maxCos - 1\right)}^{2}\right)\right) + \left(ux \cdot ux\right) \cdot \mathsf{fma}\left(-0.125, {\left(\frac{1}{{t\_1}^{3}}\right)}^{0.5} \cdot \left(t\_3 \cdot {\left(maxCos - 1\right)}^{4}\right), -0.0625 \cdot \left({\left(\frac{1}{ux \cdot {t\_0}^{5}}\right)}^{0.5} \cdot \left(t\_3 \cdot {\left(maxCos - 1\right)}^{6}\right)\right)\right)\right)\right)
\end{array}
\end{array}
Initial program 57.3%
Taylor expanded in ux around 0
*-commutativeN/A
lower-*.f32N/A
lower--.f32N/A
+-commutativeN/A
*-commutativeN/A
lower-fma.f32N/A
*-commutativeN/A
lower-*.f32N/A
lower-pow.f32N/A
lower--.f32N/A
*-commutativeN/A
lower-*.f3299.1
Applied rewrites99.1%
Taylor expanded in ux around 0
Applied rewrites97.2%
Taylor expanded in uy around inf
lower-*.f32N/A
lower-fma.f32N/A
lower-/.f32N/A
lift-PI.f32N/A
lower-*.f32N/A
lift-PI.f3297.2
Applied rewrites97.2%
(FPCore (ux uy maxCos) :precision binary32 (* (sin (fma (* PI uy) 2.0 (/ PI 2.0))) (* (sqrt ux) (sqrt (- 2.0 (* maxCos 2.0))))))
float code(float ux, float uy, float maxCos) {
return sinf(fmaf((((float) M_PI) * uy), 2.0f, (((float) M_PI) / 2.0f))) * (sqrtf(ux) * sqrtf((2.0f - (maxCos * 2.0f))));
}
function code(ux, uy, maxCos) return Float32(sin(fma(Float32(Float32(pi) * uy), Float32(2.0), Float32(Float32(pi) / Float32(2.0)))) * Float32(sqrt(ux) * sqrt(Float32(Float32(2.0) - Float32(maxCos * Float32(2.0)))))) end
\begin{array}{l}
\\
\sin \left(\mathsf{fma}\left(\pi \cdot uy, 2, \frac{\pi}{2}\right)\right) \cdot \left(\sqrt{ux} \cdot \sqrt{2 - maxCos \cdot 2}\right)
\end{array}
Initial program 57.3%
Taylor expanded in ux around 0
*-commutativeN/A
lower-*.f32N/A
Applied rewrites78.4%
herbie shell --seed 2025065
(FPCore (ux uy maxCos)
:name "UniformSampleCone, x"
:precision binary32
:pre (and (and (and (<= 2.328306437e-10 ux) (<= ux 1.0)) (and (<= 2.328306437e-10 uy) (<= uy 1.0))) (and (<= 0.0 maxCos) (<= maxCos 1.0)))
(* (cos (* (* uy 2.0) PI)) (sqrt (- 1.0 (* (+ (- 1.0 ux) (* ux maxCos)) (+ (- 1.0 ux) (* ux maxCos)))))))