
(FPCore (ux uy maxCos) :precision binary32 (let* ((t_0 (+ (- 1.0 ux) (* ux maxCos)))) (* (sin (* (* uy 2.0) PI)) (sqrt (- 1.0 (* t_0 t_0))))))
float code(float ux, float uy, float maxCos) {
float t_0 = (1.0f - ux) + (ux * maxCos);
return sinf(((uy * 2.0f) * ((float) M_PI))) * sqrtf((1.0f - (t_0 * t_0)));
}
function code(ux, uy, maxCos) t_0 = Float32(Float32(Float32(1.0) - ux) + Float32(ux * maxCos)) return Float32(sin(Float32(Float32(uy * Float32(2.0)) * Float32(pi))) * sqrt(Float32(Float32(1.0) - Float32(t_0 * t_0)))) end
function tmp = code(ux, uy, maxCos) t_0 = (single(1.0) - ux) + (ux * maxCos); tmp = sin(((uy * single(2.0)) * single(pi))) * sqrt((single(1.0) - (t_0 * t_0))); end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(1 - ux\right) + ux \cdot maxCos\\
\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - t\_0 \cdot t\_0}
\end{array}
\end{array}
Sampling outcomes in binary32 precision:
Herbie found 12 alternatives:
| Alternative | Accuracy | Speedup |
|---|
(FPCore (ux uy maxCos) :precision binary32 (let* ((t_0 (+ (- 1.0 ux) (* ux maxCos)))) (* (sin (* (* uy 2.0) PI)) (sqrt (- 1.0 (* t_0 t_0))))))
float code(float ux, float uy, float maxCos) {
float t_0 = (1.0f - ux) + (ux * maxCos);
return sinf(((uy * 2.0f) * ((float) M_PI))) * sqrtf((1.0f - (t_0 * t_0)));
}
function code(ux, uy, maxCos) t_0 = Float32(Float32(Float32(1.0) - ux) + Float32(ux * maxCos)) return Float32(sin(Float32(Float32(uy * Float32(2.0)) * Float32(pi))) * sqrt(Float32(Float32(1.0) - Float32(t_0 * t_0)))) end
function tmp = code(ux, uy, maxCos) t_0 = (single(1.0) - ux) + (ux * maxCos); tmp = sin(((uy * single(2.0)) * single(pi))) * sqrt((single(1.0) - (t_0 * t_0))); end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(1 - ux\right) + ux \cdot maxCos\\
\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{1 - t\_0 \cdot t\_0}
\end{array}
\end{array}
(FPCore (ux uy maxCos) :precision binary32 (* (sin (* (* uy 2.0) PI)) (sqrt (* (- (fma (- ux) (pow (- maxCos 1.0) 2.0) 2.0) (* maxCos 2.0)) ux))))
float code(float ux, float uy, float maxCos) {
return sinf(((uy * 2.0f) * ((float) M_PI))) * sqrtf(((fmaf(-ux, powf((maxCos - 1.0f), 2.0f), 2.0f) - (maxCos * 2.0f)) * ux));
}
function code(ux, uy, maxCos) return Float32(sin(Float32(Float32(uy * Float32(2.0)) * Float32(pi))) * sqrt(Float32(Float32(fma(Float32(-ux), (Float32(maxCos - Float32(1.0)) ^ Float32(2.0)), Float32(2.0)) - Float32(maxCos * Float32(2.0))) * ux))) end
\begin{array}{l}
\\
\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{\left(\mathsf{fma}\left(-ux, {\left(maxCos - 1\right)}^{2}, 2\right) - maxCos \cdot 2\right) \cdot ux}
\end{array}
Initial program 57.8%
Taylor expanded in ux around 0
*-commutativeN/A
lower-*.f32N/A
lower--.f32N/A
+-commutativeN/A
associate-*r*N/A
mul-1-negN/A
lower-fma.f32N/A
lower-neg.f32N/A
lower-pow.f32N/A
lower--.f32N/A
*-commutativeN/A
lower-*.f3298.4
Applied rewrites98.4%
(FPCore (ux uy maxCos)
:precision binary32
(*
(sin (* (* uy 2.0) PI))
(sqrt
(*
(- (/ (fma -2.0 maxCos 2.0) ux) (pow (fma -1.0 maxCos 1.0) 2.0))
(* ux ux)))))
float code(float ux, float uy, float maxCos) {
return sinf(((uy * 2.0f) * ((float) M_PI))) * sqrtf((((fmaf(-2.0f, maxCos, 2.0f) / ux) - powf(fmaf(-1.0f, maxCos, 1.0f), 2.0f)) * (ux * ux)));
}
function code(ux, uy, maxCos) return Float32(sin(Float32(Float32(uy * Float32(2.0)) * Float32(pi))) * sqrt(Float32(Float32(Float32(fma(Float32(-2.0), maxCos, Float32(2.0)) / ux) - (fma(Float32(-1.0), maxCos, Float32(1.0)) ^ Float32(2.0))) * Float32(ux * ux)))) end
\begin{array}{l}
\\
\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{\left(\frac{\mathsf{fma}\left(-2, maxCos, 2\right)}{ux} - {\left(\mathsf{fma}\left(-1, maxCos, 1\right)\right)}^{2}\right) \cdot \left(ux \cdot ux\right)}
\end{array}
Initial program 57.8%
Taylor expanded in ux around -inf
*-commutativeN/A
lower-*.f32N/A
Applied rewrites98.4%
(FPCore (ux uy maxCos)
:precision binary32
(let* ((t_0 (pow (- maxCos 1.0) 2.0)) (t_1 (* (- ux) t_0)))
(*
(sin (* (* uy 2.0) PI))
(sqrt
(*
(-
(/ (+ (pow t_1 3.0) 8.0) (fma t_1 t_1 (+ 4.0 (* (* ux t_0) 2.0))))
(* maxCos 2.0))
ux)))))
float code(float ux, float uy, float maxCos) {
float t_0 = powf((maxCos - 1.0f), 2.0f);
float t_1 = -ux * t_0;
return sinf(((uy * 2.0f) * ((float) M_PI))) * sqrtf(((((powf(t_1, 3.0f) + 8.0f) / fmaf(t_1, t_1, (4.0f + ((ux * t_0) * 2.0f)))) - (maxCos * 2.0f)) * ux));
}
function code(ux, uy, maxCos) t_0 = Float32(maxCos - Float32(1.0)) ^ Float32(2.0) t_1 = Float32(Float32(-ux) * t_0) return Float32(sin(Float32(Float32(uy * Float32(2.0)) * Float32(pi))) * sqrt(Float32(Float32(Float32(Float32((t_1 ^ Float32(3.0)) + Float32(8.0)) / fma(t_1, t_1, Float32(Float32(4.0) + Float32(Float32(ux * t_0) * Float32(2.0))))) - Float32(maxCos * Float32(2.0))) * ux))) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := {\left(maxCos - 1\right)}^{2}\\
t_1 := \left(-ux\right) \cdot t\_0\\
\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{\left(\frac{{t\_1}^{3} + 8}{\mathsf{fma}\left(t\_1, t\_1, 4 + \left(ux \cdot t\_0\right) \cdot 2\right)} - maxCos \cdot 2\right) \cdot ux}
\end{array}
\end{array}
Initial program 57.8%
Taylor expanded in ux around 0
*-commutativeN/A
lower-*.f32N/A
lower--.f32N/A
+-commutativeN/A
associate-*r*N/A
mul-1-negN/A
lower-fma.f32N/A
lower-neg.f32N/A
lower-pow.f32N/A
lower--.f32N/A
*-commutativeN/A
lower-*.f3298.4
Applied rewrites98.4%
lift-neg.f32N/A
lift-fma.f32N/A
lift--.f32N/A
lift-pow.f32N/A
flip3-+N/A
lower-/.f32N/A
lower-+.f32N/A
lower-pow.f32N/A
lower-*.f32N/A
lift-neg.f32N/A
lift-pow.f32N/A
lift--.f32N/A
metadata-evalN/A
Applied rewrites98.4%
Final simplification98.4%
(FPCore (ux uy maxCos)
:precision binary32
(let* ((t_0 (- 8.0 (pow ux 3.0)))
(t_1 (- 4.0 (* -2.0 ux)))
(t_2 (* ux t_1))
(t_3 (* -4.0 t_1))
(t_4
(-
(fma -8.0 t_2 (* -8.0 (pow ux 4.0)))
(* (* ux ux) (fma -4.0 ux t_3))))
(t_5 (+ (pow ux 6.0) (pow t_1 3.0)))
(t_6 (/ (* (pow ux 3.0) t_4) t_5))
(t_7 (pow t_5 2.0))
(t_8 (fma 4.0 t_2 (* 16.0 (* ux ux))))
(t_9
(-
(fma 4.0 t_2 (fma 16.0 (* ux ux) (* 28.0 (pow ux 4.0))))
(* (* ux ux) (fma 2.0 ux (fma 6.0 t_1 (* 16.0 ux))))))
(t_10 (pow t_1 2.0))
(t_11 (* (* ux ux) t_1))
(t_12 (- (+ (pow ux 4.0) t_10) t_11))
(t_13 (/ (* (pow ux 3.0) t_12) t_5))
(t_14 (fma 6.0 t_13 (/ (* t_0 t_4) t_5)))
(t_15 (* ux t_10))
(t_16
(fma 2.0 t_15 (fma 32.0 t_11 (fma 66.0 (pow ux 6.0) (* t_8 t_1)))))
(t_17 (fma -12.0 (pow ux 6.0) (fma -8.0 t_15 (* -4.0 t_15))))
(t_18 (/ (* t_0 (* t_17 t_12)) t_7))
(t_19 (- t_14 t_18))
(t_20
(-
(fma -15.0 t_13 (fma 6.0 t_6 (/ (* t_0 t_9) t_5)))
(+ (/ (* t_0 (* t_16 t_12)) t_7) (/ (* t_17 t_19) t_5)))))
(*
(sin (* (* uy 2.0) PI))
(sqrt
(fma
maxCos
(fma
maxCos
(fma
maxCos
(*
ux
(-
(fma
-15.0
t_6
(fma
6.0
(/ (* (pow ux 3.0) t_9) t_5)
(fma
20.0
t_13
(/
(*
t_0
(-
(fma -56.0 (pow ux 4.0) (* -16.0 (* ux ux)))
(* (* ux ux) (fma -24.0 ux (fma -8.0 ux t_3)))))
t_5))))
(+
(/
(*
t_0
(*
(fma -220.0 (pow ux 6.0) (fma -32.0 t_11 (* -4.0 (* ux t_8))))
t_12))
t_7)
(/ (fma t_17 t_20 (* t_16 t_19)) t_5))))
(* ux t_20))
(* ux (- t_14 (+ 2.0 t_18))))
(/ (* ux (* t_0 t_12)) t_5))))))
float code(float ux, float uy, float maxCos) {
float t_0 = 8.0f - powf(ux, 3.0f);
float t_1 = 4.0f - (-2.0f * ux);
float t_2 = ux * t_1;
float t_3 = -4.0f * t_1;
float t_4 = fmaf(-8.0f, t_2, (-8.0f * powf(ux, 4.0f))) - ((ux * ux) * fmaf(-4.0f, ux, t_3));
float t_5 = powf(ux, 6.0f) + powf(t_1, 3.0f);
float t_6 = (powf(ux, 3.0f) * t_4) / t_5;
float t_7 = powf(t_5, 2.0f);
float t_8 = fmaf(4.0f, t_2, (16.0f * (ux * ux)));
float t_9 = fmaf(4.0f, t_2, fmaf(16.0f, (ux * ux), (28.0f * powf(ux, 4.0f)))) - ((ux * ux) * fmaf(2.0f, ux, fmaf(6.0f, t_1, (16.0f * ux))));
float t_10 = powf(t_1, 2.0f);
float t_11 = (ux * ux) * t_1;
float t_12 = (powf(ux, 4.0f) + t_10) - t_11;
float t_13 = (powf(ux, 3.0f) * t_12) / t_5;
float t_14 = fmaf(6.0f, t_13, ((t_0 * t_4) / t_5));
float t_15 = ux * t_10;
float t_16 = fmaf(2.0f, t_15, fmaf(32.0f, t_11, fmaf(66.0f, powf(ux, 6.0f), (t_8 * t_1))));
float t_17 = fmaf(-12.0f, powf(ux, 6.0f), fmaf(-8.0f, t_15, (-4.0f * t_15)));
float t_18 = (t_0 * (t_17 * t_12)) / t_7;
float t_19 = t_14 - t_18;
float t_20 = fmaf(-15.0f, t_13, fmaf(6.0f, t_6, ((t_0 * t_9) / t_5))) - (((t_0 * (t_16 * t_12)) / t_7) + ((t_17 * t_19) / t_5));
return sinf(((uy * 2.0f) * ((float) M_PI))) * sqrtf(fmaf(maxCos, fmaf(maxCos, fmaf(maxCos, (ux * (fmaf(-15.0f, t_6, fmaf(6.0f, ((powf(ux, 3.0f) * t_9) / t_5), fmaf(20.0f, t_13, ((t_0 * (fmaf(-56.0f, powf(ux, 4.0f), (-16.0f * (ux * ux))) - ((ux * ux) * fmaf(-24.0f, ux, fmaf(-8.0f, ux, t_3))))) / t_5)))) - (((t_0 * (fmaf(-220.0f, powf(ux, 6.0f), fmaf(-32.0f, t_11, (-4.0f * (ux * t_8)))) * t_12)) / t_7) + (fmaf(t_17, t_20, (t_16 * t_19)) / t_5)))), (ux * t_20)), (ux * (t_14 - (2.0f + t_18)))), ((ux * (t_0 * t_12)) / t_5)));
}
function code(ux, uy, maxCos) t_0 = Float32(Float32(8.0) - (ux ^ Float32(3.0))) t_1 = Float32(Float32(4.0) - Float32(Float32(-2.0) * ux)) t_2 = Float32(ux * t_1) t_3 = Float32(Float32(-4.0) * t_1) t_4 = Float32(fma(Float32(-8.0), t_2, Float32(Float32(-8.0) * (ux ^ Float32(4.0)))) - Float32(Float32(ux * ux) * fma(Float32(-4.0), ux, t_3))) t_5 = Float32((ux ^ Float32(6.0)) + (t_1 ^ Float32(3.0))) t_6 = Float32(Float32((ux ^ Float32(3.0)) * t_4) / t_5) t_7 = t_5 ^ Float32(2.0) t_8 = fma(Float32(4.0), t_2, Float32(Float32(16.0) * Float32(ux * ux))) t_9 = Float32(fma(Float32(4.0), t_2, fma(Float32(16.0), Float32(ux * ux), Float32(Float32(28.0) * (ux ^ Float32(4.0))))) - Float32(Float32(ux * ux) * fma(Float32(2.0), ux, fma(Float32(6.0), t_1, Float32(Float32(16.0) * ux))))) t_10 = t_1 ^ Float32(2.0) t_11 = Float32(Float32(ux * ux) * t_1) t_12 = Float32(Float32((ux ^ Float32(4.0)) + t_10) - t_11) t_13 = Float32(Float32((ux ^ Float32(3.0)) * t_12) / t_5) t_14 = fma(Float32(6.0), t_13, Float32(Float32(t_0 * t_4) / t_5)) t_15 = Float32(ux * t_10) t_16 = fma(Float32(2.0), t_15, fma(Float32(32.0), t_11, fma(Float32(66.0), (ux ^ Float32(6.0)), Float32(t_8 * t_1)))) t_17 = fma(Float32(-12.0), (ux ^ Float32(6.0)), fma(Float32(-8.0), t_15, Float32(Float32(-4.0) * t_15))) t_18 = Float32(Float32(t_0 * Float32(t_17 * t_12)) / t_7) t_19 = Float32(t_14 - t_18) t_20 = Float32(fma(Float32(-15.0), t_13, fma(Float32(6.0), t_6, Float32(Float32(t_0 * t_9) / t_5))) - Float32(Float32(Float32(t_0 * Float32(t_16 * t_12)) / t_7) + Float32(Float32(t_17 * t_19) / t_5))) return Float32(sin(Float32(Float32(uy * Float32(2.0)) * Float32(pi))) * sqrt(fma(maxCos, fma(maxCos, fma(maxCos, Float32(ux * Float32(fma(Float32(-15.0), t_6, fma(Float32(6.0), Float32(Float32((ux ^ Float32(3.0)) * t_9) / t_5), fma(Float32(20.0), t_13, Float32(Float32(t_0 * Float32(fma(Float32(-56.0), (ux ^ Float32(4.0)), Float32(Float32(-16.0) * Float32(ux * ux))) - Float32(Float32(ux * ux) * fma(Float32(-24.0), ux, fma(Float32(-8.0), ux, t_3))))) / t_5)))) - Float32(Float32(Float32(t_0 * Float32(fma(Float32(-220.0), (ux ^ Float32(6.0)), fma(Float32(-32.0), t_11, Float32(Float32(-4.0) * Float32(ux * t_8)))) * t_12)) / t_7) + Float32(fma(t_17, t_20, Float32(t_16 * t_19)) / t_5)))), Float32(ux * t_20)), Float32(ux * Float32(t_14 - Float32(Float32(2.0) + t_18)))), Float32(Float32(ux * Float32(t_0 * t_12)) / t_5)))) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := 8 - {ux}^{3}\\
t_1 := 4 - -2 \cdot ux\\
t_2 := ux \cdot t\_1\\
t_3 := -4 \cdot t\_1\\
t_4 := \mathsf{fma}\left(-8, t\_2, -8 \cdot {ux}^{4}\right) - \left(ux \cdot ux\right) \cdot \mathsf{fma}\left(-4, ux, t\_3\right)\\
t_5 := {ux}^{6} + {t\_1}^{3}\\
t_6 := \frac{{ux}^{3} \cdot t\_4}{t\_5}\\
t_7 := {t\_5}^{2}\\
t_8 := \mathsf{fma}\left(4, t\_2, 16 \cdot \left(ux \cdot ux\right)\right)\\
t_9 := \mathsf{fma}\left(4, t\_2, \mathsf{fma}\left(16, ux \cdot ux, 28 \cdot {ux}^{4}\right)\right) - \left(ux \cdot ux\right) \cdot \mathsf{fma}\left(2, ux, \mathsf{fma}\left(6, t\_1, 16 \cdot ux\right)\right)\\
t_10 := {t\_1}^{2}\\
t_11 := \left(ux \cdot ux\right) \cdot t\_1\\
t_12 := \left({ux}^{4} + t\_10\right) - t\_11\\
t_13 := \frac{{ux}^{3} \cdot t\_12}{t\_5}\\
t_14 := \mathsf{fma}\left(6, t\_13, \frac{t\_0 \cdot t\_4}{t\_5}\right)\\
t_15 := ux \cdot t\_10\\
t_16 := \mathsf{fma}\left(2, t\_15, \mathsf{fma}\left(32, t\_11, \mathsf{fma}\left(66, {ux}^{6}, t\_8 \cdot t\_1\right)\right)\right)\\
t_17 := \mathsf{fma}\left(-12, {ux}^{6}, \mathsf{fma}\left(-8, t\_15, -4 \cdot t\_15\right)\right)\\
t_18 := \frac{t\_0 \cdot \left(t\_17 \cdot t\_12\right)}{t\_7}\\
t_19 := t\_14 - t\_18\\
t_20 := \mathsf{fma}\left(-15, t\_13, \mathsf{fma}\left(6, t\_6, \frac{t\_0 \cdot t\_9}{t\_5}\right)\right) - \left(\frac{t\_0 \cdot \left(t\_16 \cdot t\_12\right)}{t\_7} + \frac{t\_17 \cdot t\_19}{t\_5}\right)\\
\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \sqrt{\mathsf{fma}\left(maxCos, \mathsf{fma}\left(maxCos, \mathsf{fma}\left(maxCos, ux \cdot \left(\mathsf{fma}\left(-15, t\_6, \mathsf{fma}\left(6, \frac{{ux}^{3} \cdot t\_9}{t\_5}, \mathsf{fma}\left(20, t\_13, \frac{t\_0 \cdot \left(\mathsf{fma}\left(-56, {ux}^{4}, -16 \cdot \left(ux \cdot ux\right)\right) - \left(ux \cdot ux\right) \cdot \mathsf{fma}\left(-24, ux, \mathsf{fma}\left(-8, ux, t\_3\right)\right)\right)}{t\_5}\right)\right)\right) - \left(\frac{t\_0 \cdot \left(\mathsf{fma}\left(-220, {ux}^{6}, \mathsf{fma}\left(-32, t\_11, -4 \cdot \left(ux \cdot t\_8\right)\right)\right) \cdot t\_12\right)}{t\_7} + \frac{\mathsf{fma}\left(t\_17, t\_20, t\_16 \cdot t\_19\right)}{t\_5}\right)\right), ux \cdot t\_20\right), ux \cdot \left(t\_14 - \left(2 + t\_18\right)\right)\right), \frac{ux \cdot \left(t\_0 \cdot t\_12\right)}{t\_5}\right)}
\end{array}
\end{array}
Initial program 57.8%
Taylor expanded in ux around 0
*-commutativeN/A
lower-*.f32N/A
lower--.f32N/A
+-commutativeN/A
associate-*r*N/A
mul-1-negN/A
lower-fma.f32N/A
lower-neg.f32N/A
lower-pow.f32N/A
lower--.f32N/A
*-commutativeN/A
lower-*.f3298.4
Applied rewrites98.4%
lift-neg.f32N/A
lift-fma.f32N/A
lift--.f32N/A
lift-pow.f32N/A
flip3-+N/A
lower-/.f32N/A
lower-+.f32N/A
lower-pow.f32N/A
lower-*.f32N/A
lift-neg.f32N/A
lift-pow.f32N/A
lift--.f32N/A
metadata-evalN/A
Applied rewrites98.4%
lift-fma.f32N/A
lift-neg.f32N/A
lift-*.f32N/A
lift--.f32N/A
lift-pow.f32N/A
lift-neg.f32N/A
lift-*.f32N/A
lift--.f32N/A
lift-pow.f32N/A
lift--.f32N/A
lift-*.f32N/A
lift-neg.f32N/A
lift-*.f32N/A
lift--.f32N/A
lift-pow.f32N/A
Applied rewrites98.4%
Taylor expanded in maxCos around 0
Applied rewrites98.4%
Final simplification98.4%
(FPCore (ux uy maxCos)
:precision binary32
(let* ((t_0 (- 2.0 (* 2.0 maxCos))) (t_1 (* ux t_0)))
(*
(sin (* (* uy 2.0) PI))
(fma
(sqrt ux)
(sqrt t_0)
(*
(* ux ux)
(fma
-0.5
(* (sqrt (/ 1.0 t_1)) (pow (- maxCos 1.0) 2.0))
(*
(* ux ux)
(fma
-0.125
(* (sqrt (/ 1.0 (pow t_1 3.0))) (pow (- maxCos 1.0) 4.0))
(*
-0.0625
(*
(sqrt (/ 1.0 (* ux (pow t_0 5.0))))
(pow (- maxCos 1.0) 6.0)))))))))))
float code(float ux, float uy, float maxCos) {
float t_0 = 2.0f - (2.0f * maxCos);
float t_1 = ux * t_0;
return sinf(((uy * 2.0f) * ((float) M_PI))) * fmaf(sqrtf(ux), sqrtf(t_0), ((ux * ux) * fmaf(-0.5f, (sqrtf((1.0f / t_1)) * powf((maxCos - 1.0f), 2.0f)), ((ux * ux) * fmaf(-0.125f, (sqrtf((1.0f / powf(t_1, 3.0f))) * powf((maxCos - 1.0f), 4.0f)), (-0.0625f * (sqrtf((1.0f / (ux * powf(t_0, 5.0f)))) * powf((maxCos - 1.0f), 6.0f))))))));
}
function code(ux, uy, maxCos) t_0 = Float32(Float32(2.0) - Float32(Float32(2.0) * maxCos)) t_1 = Float32(ux * t_0) return Float32(sin(Float32(Float32(uy * Float32(2.0)) * Float32(pi))) * fma(sqrt(ux), sqrt(t_0), Float32(Float32(ux * ux) * fma(Float32(-0.5), Float32(sqrt(Float32(Float32(1.0) / t_1)) * (Float32(maxCos - Float32(1.0)) ^ Float32(2.0))), Float32(Float32(ux * ux) * fma(Float32(-0.125), Float32(sqrt(Float32(Float32(1.0) / (t_1 ^ Float32(3.0)))) * (Float32(maxCos - Float32(1.0)) ^ Float32(4.0))), Float32(Float32(-0.0625) * Float32(sqrt(Float32(Float32(1.0) / Float32(ux * (t_0 ^ Float32(5.0))))) * (Float32(maxCos - Float32(1.0)) ^ Float32(6.0)))))))))) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := 2 - 2 \cdot maxCos\\
t_1 := ux \cdot t\_0\\
\sin \left(\left(uy \cdot 2\right) \cdot \pi\right) \cdot \mathsf{fma}\left(\sqrt{ux}, \sqrt{t\_0}, \left(ux \cdot ux\right) \cdot \mathsf{fma}\left(-0.5, \sqrt{\frac{1}{t\_1}} \cdot {\left(maxCos - 1\right)}^{2}, \left(ux \cdot ux\right) \cdot \mathsf{fma}\left(-0.125, \sqrt{\frac{1}{{t\_1}^{3}}} \cdot {\left(maxCos - 1\right)}^{4}, -0.0625 \cdot \left(\sqrt{\frac{1}{ux \cdot {t\_0}^{5}}} \cdot {\left(maxCos - 1\right)}^{6}\right)\right)\right)\right)
\end{array}
\end{array}
Initial program 57.8%
Taylor expanded in ux around 0
*-commutativeN/A
lower-*.f32N/A
lower--.f32N/A
+-commutativeN/A
associate-*r*N/A
mul-1-negN/A
lower-fma.f32N/A
lower-neg.f32N/A
lower-pow.f32N/A
lower--.f32N/A
*-commutativeN/A
lower-*.f3298.4
Applied rewrites98.4%
Taylor expanded in ux around 0
Applied rewrites95.6%
(FPCore (ux uy maxCos)
:precision binary32
(let* ((t_0 (sin (* PI (* 2.0 uy)))) (t_1 (* (fma -2.0 maxCos 2.0) ux)))
(fma
(fma
(* -0.5 (sqrt (/ (/ 1.0 ux) (fma -2.0 maxCos 2.0))))
(* (pow (- maxCos 1.0) 2.0) t_0)
(*
(fma
(*
(* (pow (- maxCos 1.0) 6.0) t_0)
(sqrt (/ (/ 1.0 ux) (pow (fma -2.0 maxCos 2.0) 5.0))))
-0.0625
(*
(* (sqrt (/ 1.0 (pow t_1 3.0))) (* (pow (- maxCos 1.0) 4.0) t_0))
-0.125))
(* ux ux)))
(* ux ux)
(* t_0 (sqrt t_1)))))
float code(float ux, float uy, float maxCos) {
float t_0 = sinf((((float) M_PI) * (2.0f * uy)));
float t_1 = fmaf(-2.0f, maxCos, 2.0f) * ux;
return fmaf(fmaf((-0.5f * sqrtf(((1.0f / ux) / fmaf(-2.0f, maxCos, 2.0f)))), (powf((maxCos - 1.0f), 2.0f) * t_0), (fmaf(((powf((maxCos - 1.0f), 6.0f) * t_0) * sqrtf(((1.0f / ux) / powf(fmaf(-2.0f, maxCos, 2.0f), 5.0f)))), -0.0625f, ((sqrtf((1.0f / powf(t_1, 3.0f))) * (powf((maxCos - 1.0f), 4.0f) * t_0)) * -0.125f)) * (ux * ux))), (ux * ux), (t_0 * sqrtf(t_1)));
}
function code(ux, uy, maxCos) t_0 = sin(Float32(Float32(pi) * Float32(Float32(2.0) * uy))) t_1 = Float32(fma(Float32(-2.0), maxCos, Float32(2.0)) * ux) return fma(fma(Float32(Float32(-0.5) * sqrt(Float32(Float32(Float32(1.0) / ux) / fma(Float32(-2.0), maxCos, Float32(2.0))))), Float32((Float32(maxCos - Float32(1.0)) ^ Float32(2.0)) * t_0), Float32(fma(Float32(Float32((Float32(maxCos - Float32(1.0)) ^ Float32(6.0)) * t_0) * sqrt(Float32(Float32(Float32(1.0) / ux) / (fma(Float32(-2.0), maxCos, Float32(2.0)) ^ Float32(5.0))))), Float32(-0.0625), Float32(Float32(sqrt(Float32(Float32(1.0) / (t_1 ^ Float32(3.0)))) * Float32((Float32(maxCos - Float32(1.0)) ^ Float32(4.0)) * t_0)) * Float32(-0.125))) * Float32(ux * ux))), Float32(ux * ux), Float32(t_0 * sqrt(t_1))) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \sin \left(\pi \cdot \left(2 \cdot uy\right)\right)\\
t_1 := \mathsf{fma}\left(-2, maxCos, 2\right) \cdot ux\\
\mathsf{fma}\left(\mathsf{fma}\left(-0.5 \cdot \sqrt{\frac{\frac{1}{ux}}{\mathsf{fma}\left(-2, maxCos, 2\right)}}, {\left(maxCos - 1\right)}^{2} \cdot t\_0, \mathsf{fma}\left(\left({\left(maxCos - 1\right)}^{6} \cdot t\_0\right) \cdot \sqrt{\frac{\frac{1}{ux}}{{\left(\mathsf{fma}\left(-2, maxCos, 2\right)\right)}^{5}}}, -0.0625, \left(\sqrt{\frac{1}{{t\_1}^{3}}} \cdot \left({\left(maxCos - 1\right)}^{4} \cdot t\_0\right)\right) \cdot -0.125\right) \cdot \left(ux \cdot ux\right)\right), ux \cdot ux, t\_0 \cdot \sqrt{t\_1}\right)
\end{array}
\end{array}
Initial program 57.8%
Taylor expanded in ux around 0
Applied rewrites95.4%
(FPCore (ux uy maxCos)
:precision binary32
(let* ((t_0 (sin (* PI (* 2.0 uy)))))
(fma
(*
(* (pow (- maxCos 1.0) 2.0) t_0)
(sqrt (/ (pow ux 3.0) (fma -2.0 maxCos 2.0))))
-0.5
(* t_0 (* (sqrt (fma -2.0 maxCos 2.0)) (sqrt ux))))))
float code(float ux, float uy, float maxCos) {
float t_0 = sinf((((float) M_PI) * (2.0f * uy)));
return fmaf(((powf((maxCos - 1.0f), 2.0f) * t_0) * sqrtf((powf(ux, 3.0f) / fmaf(-2.0f, maxCos, 2.0f)))), -0.5f, (t_0 * (sqrtf(fmaf(-2.0f, maxCos, 2.0f)) * sqrtf(ux))));
}
function code(ux, uy, maxCos) t_0 = sin(Float32(Float32(pi) * Float32(Float32(2.0) * uy))) return fma(Float32(Float32((Float32(maxCos - Float32(1.0)) ^ Float32(2.0)) * t_0) * sqrt(Float32((ux ^ Float32(3.0)) / fma(Float32(-2.0), maxCos, Float32(2.0))))), Float32(-0.5), Float32(t_0 * Float32(sqrt(fma(Float32(-2.0), maxCos, Float32(2.0))) * sqrt(ux)))) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \sin \left(\pi \cdot \left(2 \cdot uy\right)\right)\\
\mathsf{fma}\left(\left({\left(maxCos - 1\right)}^{2} \cdot t\_0\right) \cdot \sqrt{\frac{{ux}^{3}}{\mathsf{fma}\left(-2, maxCos, 2\right)}}, -0.5, t\_0 \cdot \left(\sqrt{\mathsf{fma}\left(-2, maxCos, 2\right)} \cdot \sqrt{ux}\right)\right)
\end{array}
\end{array}
Initial program 57.8%
Taylor expanded in ux around 0
*-commutativeN/A
lower-fma.f32N/A
Applied rewrites90.1%
lift-sqrt.f32N/A
lift-*.f32N/A
lift-fma.f32N/A
sqrt-prodN/A
lower-*.f32N/A
lower-sqrt.f32N/A
lift-fma.f32N/A
lower-sqrt.f3290.2
Applied rewrites90.2%
(FPCore (ux uy maxCos)
:precision binary32
(let* ((t_0 (sin (* PI (* 2.0 uy)))))
(fma
(*
(* (pow (- maxCos 1.0) 2.0) t_0)
(sqrt (/ (pow ux 3.0) (fma -2.0 maxCos 2.0))))
-0.5
(* t_0 (sqrt (* (fma -2.0 maxCos 2.0) ux))))))
float code(float ux, float uy, float maxCos) {
float t_0 = sinf((((float) M_PI) * (2.0f * uy)));
return fmaf(((powf((maxCos - 1.0f), 2.0f) * t_0) * sqrtf((powf(ux, 3.0f) / fmaf(-2.0f, maxCos, 2.0f)))), -0.5f, (t_0 * sqrtf((fmaf(-2.0f, maxCos, 2.0f) * ux))));
}
function code(ux, uy, maxCos) t_0 = sin(Float32(Float32(pi) * Float32(Float32(2.0) * uy))) return fma(Float32(Float32((Float32(maxCos - Float32(1.0)) ^ Float32(2.0)) * t_0) * sqrt(Float32((ux ^ Float32(3.0)) / fma(Float32(-2.0), maxCos, Float32(2.0))))), Float32(-0.5), Float32(t_0 * sqrt(Float32(fma(Float32(-2.0), maxCos, Float32(2.0)) * ux)))) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \sin \left(\pi \cdot \left(2 \cdot uy\right)\right)\\
\mathsf{fma}\left(\left({\left(maxCos - 1\right)}^{2} \cdot t\_0\right) \cdot \sqrt{\frac{{ux}^{3}}{\mathsf{fma}\left(-2, maxCos, 2\right)}}, -0.5, t\_0 \cdot \sqrt{\mathsf{fma}\left(-2, maxCos, 2\right) \cdot ux}\right)
\end{array}
\end{array}
Initial program 57.8%
Taylor expanded in ux around 0
*-commutativeN/A
lower-fma.f32N/A
Applied rewrites90.1%
(FPCore (ux uy maxCos)
:precision binary32
(let* ((t_0 (sin (* PI (* 2.0 uy)))))
(fma
(*
(* (pow (- maxCos 1.0) 2.0) t_0)
(sqrt (/ (pow ux 3.0) (fma -2.0 maxCos 2.0))))
-0.5
(* t_0 (sqrt (* (* maxCos (- (* 2.0 (/ 1.0 maxCos)) 2.0)) ux))))))
float code(float ux, float uy, float maxCos) {
float t_0 = sinf((((float) M_PI) * (2.0f * uy)));
return fmaf(((powf((maxCos - 1.0f), 2.0f) * t_0) * sqrtf((powf(ux, 3.0f) / fmaf(-2.0f, maxCos, 2.0f)))), -0.5f, (t_0 * sqrtf(((maxCos * ((2.0f * (1.0f / maxCos)) - 2.0f)) * ux))));
}
function code(ux, uy, maxCos) t_0 = sin(Float32(Float32(pi) * Float32(Float32(2.0) * uy))) return fma(Float32(Float32((Float32(maxCos - Float32(1.0)) ^ Float32(2.0)) * t_0) * sqrt(Float32((ux ^ Float32(3.0)) / fma(Float32(-2.0), maxCos, Float32(2.0))))), Float32(-0.5), Float32(t_0 * sqrt(Float32(Float32(maxCos * Float32(Float32(Float32(2.0) * Float32(Float32(1.0) / maxCos)) - Float32(2.0))) * ux)))) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \sin \left(\pi \cdot \left(2 \cdot uy\right)\right)\\
\mathsf{fma}\left(\left({\left(maxCos - 1\right)}^{2} \cdot t\_0\right) \cdot \sqrt{\frac{{ux}^{3}}{\mathsf{fma}\left(-2, maxCos, 2\right)}}, -0.5, t\_0 \cdot \sqrt{\left(maxCos \cdot \left(2 \cdot \frac{1}{maxCos} - 2\right)\right) \cdot ux}\right)
\end{array}
\end{array}
Initial program 57.8%
Taylor expanded in ux around 0
*-commutativeN/A
lower-fma.f32N/A
Applied rewrites90.1%
Taylor expanded in maxCos around inf
lower-*.f32N/A
lower--.f32N/A
lower-*.f32N/A
lower-/.f3290.1
Applied rewrites90.1%
(FPCore (ux uy maxCos)
:precision binary32
(let* ((t_0 (sin (* PI (* 2.0 uy))))
(t_1
(*
(*
(* (pow (- maxCos 1.0) 2.0) t_0)
(sqrt (/ (pow ux 3.0) (fma -2.0 maxCos 2.0))))
-0.5))
(t_2 (* t_0 (sqrt (* (fma -2.0 maxCos 2.0) ux)))))
(/ (- (* t_1 t_1) (* t_2 t_2)) (- t_1 t_2))))
float code(float ux, float uy, float maxCos) {
float t_0 = sinf((((float) M_PI) * (2.0f * uy)));
float t_1 = ((powf((maxCos - 1.0f), 2.0f) * t_0) * sqrtf((powf(ux, 3.0f) / fmaf(-2.0f, maxCos, 2.0f)))) * -0.5f;
float t_2 = t_0 * sqrtf((fmaf(-2.0f, maxCos, 2.0f) * ux));
return ((t_1 * t_1) - (t_2 * t_2)) / (t_1 - t_2);
}
function code(ux, uy, maxCos) t_0 = sin(Float32(Float32(pi) * Float32(Float32(2.0) * uy))) t_1 = Float32(Float32(Float32((Float32(maxCos - Float32(1.0)) ^ Float32(2.0)) * t_0) * sqrt(Float32((ux ^ Float32(3.0)) / fma(Float32(-2.0), maxCos, Float32(2.0))))) * Float32(-0.5)) t_2 = Float32(t_0 * sqrt(Float32(fma(Float32(-2.0), maxCos, Float32(2.0)) * ux))) return Float32(Float32(Float32(t_1 * t_1) - Float32(t_2 * t_2)) / Float32(t_1 - t_2)) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \sin \left(\pi \cdot \left(2 \cdot uy\right)\right)\\
t_1 := \left(\left({\left(maxCos - 1\right)}^{2} \cdot t\_0\right) \cdot \sqrt{\frac{{ux}^{3}}{\mathsf{fma}\left(-2, maxCos, 2\right)}}\right) \cdot -0.5\\
t_2 := t\_0 \cdot \sqrt{\mathsf{fma}\left(-2, maxCos, 2\right) \cdot ux}\\
\frac{t\_1 \cdot t\_1 - t\_2 \cdot t\_2}{t\_1 - t\_2}
\end{array}
\end{array}
Initial program 57.8%
Taylor expanded in ux around 0
*-commutativeN/A
lower-fma.f32N/A
Applied rewrites90.1%
Applied rewrites90.1%
(FPCore (ux uy maxCos)
:precision binary32
(let* ((t_0 (sin (* 2.0 (* uy PI)))) (t_1 (+ 2.0 (* -2.0 maxCos))))
(*
(* ux ux)
(/
(fma
-0.5
(* (sqrt (/ (pow ux 3.0) t_1)) (* t_0 (pow (- maxCos 1.0) 2.0)))
(* (sqrt (* ux t_1)) t_0))
(* ux ux)))))
float code(float ux, float uy, float maxCos) {
float t_0 = sinf((2.0f * (uy * ((float) M_PI))));
float t_1 = 2.0f + (-2.0f * maxCos);
return (ux * ux) * (fmaf(-0.5f, (sqrtf((powf(ux, 3.0f) / t_1)) * (t_0 * powf((maxCos - 1.0f), 2.0f))), (sqrtf((ux * t_1)) * t_0)) / (ux * ux));
}
function code(ux, uy, maxCos) t_0 = sin(Float32(Float32(2.0) * Float32(uy * Float32(pi)))) t_1 = Float32(Float32(2.0) + Float32(Float32(-2.0) * maxCos)) return Float32(Float32(ux * ux) * Float32(fma(Float32(-0.5), Float32(sqrt(Float32((ux ^ Float32(3.0)) / t_1)) * Float32(t_0 * (Float32(maxCos - Float32(1.0)) ^ Float32(2.0)))), Float32(sqrt(Float32(ux * t_1)) * t_0)) / Float32(ux * ux))) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \sin \left(2 \cdot \left(uy \cdot \pi\right)\right)\\
t_1 := 2 + -2 \cdot maxCos\\
\left(ux \cdot ux\right) \cdot \frac{\mathsf{fma}\left(-0.5, \sqrt{\frac{{ux}^{3}}{t\_1}} \cdot \left(t\_0 \cdot {\left(maxCos - 1\right)}^{2}\right), \sqrt{ux \cdot t\_1} \cdot t\_0\right)}{ux \cdot ux}
\end{array}
\end{array}
Initial program 57.8%
Taylor expanded in ux around 0
*-commutativeN/A
lower-fma.f32N/A
Applied rewrites90.1%
Taylor expanded in ux around inf
lower-*.f32N/A
pow2N/A
lift-*.f32N/A
lower-fma.f32N/A
Applied rewrites89.9%
Taylor expanded in ux around 0
lower-/.f32N/A
Applied rewrites90.0%
(FPCore (ux uy maxCos)
:precision binary32
(let* ((t_0 (+ 2.0 (* -2.0 maxCos))) (t_1 (sin (* 2.0 (* uy PI)))))
(*
(exp (* (log ux) 2.0))
(fma
-0.5
(* (sqrt (/ 1.0 (* ux t_0))) (* t_1 (pow (- maxCos 1.0) 2.0)))
(* (sqrt (/ t_0 (pow ux 3.0))) t_1)))))
float code(float ux, float uy, float maxCos) {
float t_0 = 2.0f + (-2.0f * maxCos);
float t_1 = sinf((2.0f * (uy * ((float) M_PI))));
return expf((logf(ux) * 2.0f)) * fmaf(-0.5f, (sqrtf((1.0f / (ux * t_0))) * (t_1 * powf((maxCos - 1.0f), 2.0f))), (sqrtf((t_0 / powf(ux, 3.0f))) * t_1));
}
function code(ux, uy, maxCos) t_0 = Float32(Float32(2.0) + Float32(Float32(-2.0) * maxCos)) t_1 = sin(Float32(Float32(2.0) * Float32(uy * Float32(pi)))) return Float32(exp(Float32(log(ux) * Float32(2.0))) * fma(Float32(-0.5), Float32(sqrt(Float32(Float32(1.0) / Float32(ux * t_0))) * Float32(t_1 * (Float32(maxCos - Float32(1.0)) ^ Float32(2.0)))), Float32(sqrt(Float32(t_0 / (ux ^ Float32(3.0)))) * t_1))) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := 2 + -2 \cdot maxCos\\
t_1 := \sin \left(2 \cdot \left(uy \cdot \pi\right)\right)\\
e^{\log ux \cdot 2} \cdot \mathsf{fma}\left(-0.5, \sqrt{\frac{1}{ux \cdot t\_0}} \cdot \left(t\_1 \cdot {\left(maxCos - 1\right)}^{2}\right), \sqrt{\frac{t\_0}{{ux}^{3}}} \cdot t\_1\right)
\end{array}
\end{array}
Initial program 57.8%
Taylor expanded in ux around 0
*-commutativeN/A
lower-fma.f32N/A
Applied rewrites90.1%
Taylor expanded in ux around inf
lower-*.f32N/A
pow2N/A
lift-*.f32N/A
lower-fma.f32N/A
Applied rewrites89.9%
lift-*.f32N/A
pow2N/A
pow-to-expN/A
lower-exp.f32N/A
lower-*.f32N/A
lower-log.f3285.1
Applied rewrites85.1%
herbie shell --seed 2025057
(FPCore (ux uy maxCos)
:name "UniformSampleCone, y"
:precision binary32
:pre (and (and (and (<= 2.328306437e-10 ux) (<= ux 1.0)) (and (<= 2.328306437e-10 uy) (<= uy 1.0))) (and (<= 0.0 maxCos) (<= maxCos 1.0)))
(* (sin (* (* uy 2.0) PI)) (sqrt (- 1.0 (* (+ (- 1.0 ux) (* ux maxCos)) (+ (- 1.0 ux) (* ux maxCos)))))))