
(FPCore (cosTheta_i u1 u2) :precision binary32 (* (sqrt (- (log (- 1.0 u1)))) (sin (* (* 2.0 PI) u2))))
float code(float cosTheta_i, float u1, float u2) {
return sqrtf(-logf((1.0f - u1))) * sinf(((2.0f * ((float) M_PI)) * u2));
}
function code(cosTheta_i, u1, u2) return Float32(sqrt(Float32(-log(Float32(Float32(1.0) - u1)))) * sin(Float32(Float32(Float32(2.0) * Float32(pi)) * u2))) end
function tmp = code(cosTheta_i, u1, u2) tmp = sqrt(-log((single(1.0) - u1))) * sin(((single(2.0) * single(pi)) * u2)); end
\begin{array}{l}
\\
\sqrt{-\log \left(1 - u1\right)} \cdot \sin \left(\left(2 \cdot \pi\right) \cdot u2\right)
\end{array}
Sampling outcomes in binary32 precision:
Herbie found 16 alternatives:
| Alternative | Accuracy | Speedup |
|---|
(FPCore (cosTheta_i u1 u2) :precision binary32 (* (sqrt (- (log (- 1.0 u1)))) (sin (* (* 2.0 PI) u2))))
float code(float cosTheta_i, float u1, float u2) {
return sqrtf(-logf((1.0f - u1))) * sinf(((2.0f * ((float) M_PI)) * u2));
}
function code(cosTheta_i, u1, u2) return Float32(sqrt(Float32(-log(Float32(Float32(1.0) - u1)))) * sin(Float32(Float32(Float32(2.0) * Float32(pi)) * u2))) end
function tmp = code(cosTheta_i, u1, u2) tmp = sqrt(-log((single(1.0) - u1))) * sin(((single(2.0) * single(pi)) * u2)); end
\begin{array}{l}
\\
\sqrt{-\log \left(1 - u1\right)} \cdot \sin \left(\left(2 \cdot \pi\right) \cdot u2\right)
\end{array}
(FPCore (cosTheta_i u1 u2) :precision binary32 (* (sqrt (- (log1p (- u1)))) (sin (* (* 2.0 PI) u2))))
float code(float cosTheta_i, float u1, float u2) {
return sqrtf(-log1pf(-u1)) * sinf(((2.0f * ((float) M_PI)) * u2));
}
function code(cosTheta_i, u1, u2) return Float32(sqrt(Float32(-log1p(Float32(-u1)))) * sin(Float32(Float32(Float32(2.0) * Float32(pi)) * u2))) end
\begin{array}{l}
\\
\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \sin \left(\left(2 \cdot \pi\right) \cdot u2\right)
\end{array}
Initial program 57.4%
sub-negN/A
accelerator-lowering-log1p.f32N/A
neg-lowering-neg.f3298.4
Applied egg-rr98.4%
(FPCore (cosTheta_i u1 u2)
:precision binary32
(let* ((t_0 (fma u1 (fma u1 -0.25 -0.3333333333333333) -0.5)))
(if (<= (- (log (- 1.0 u1))) 0.07199999690055847)
(*
(sin (* (* 2.0 PI) u2))
(sqrt (/ (* (- u1) (fma t_0 (* t_0 (* u1 u1)) -1.0)) (fma u1 t_0 1.0))))
(*
(sqrt (- (log1p (- u1))))
(*
u2
(fma (* -1.3333333333333333 (* u2 u2)) (* PI (* PI PI)) (* 2.0 PI)))))))
float code(float cosTheta_i, float u1, float u2) {
float t_0 = fmaf(u1, fmaf(u1, -0.25f, -0.3333333333333333f), -0.5f);
float tmp;
if (-logf((1.0f - u1)) <= 0.07199999690055847f) {
tmp = sinf(((2.0f * ((float) M_PI)) * u2)) * sqrtf(((-u1 * fmaf(t_0, (t_0 * (u1 * u1)), -1.0f)) / fmaf(u1, t_0, 1.0f)));
} else {
tmp = sqrtf(-log1pf(-u1)) * (u2 * fmaf((-1.3333333333333333f * (u2 * u2)), (((float) M_PI) * (((float) M_PI) * ((float) M_PI))), (2.0f * ((float) M_PI))));
}
return tmp;
}
function code(cosTheta_i, u1, u2) t_0 = fma(u1, fma(u1, Float32(-0.25), Float32(-0.3333333333333333)), Float32(-0.5)) tmp = Float32(0.0) if (Float32(-log(Float32(Float32(1.0) - u1))) <= Float32(0.07199999690055847)) tmp = Float32(sin(Float32(Float32(Float32(2.0) * Float32(pi)) * u2)) * sqrt(Float32(Float32(Float32(-u1) * fma(t_0, Float32(t_0 * Float32(u1 * u1)), Float32(-1.0))) / fma(u1, t_0, Float32(1.0))))); else tmp = Float32(sqrt(Float32(-log1p(Float32(-u1)))) * Float32(u2 * fma(Float32(Float32(-1.3333333333333333) * Float32(u2 * u2)), Float32(Float32(pi) * Float32(Float32(pi) * Float32(pi))), Float32(Float32(2.0) * Float32(pi))))); end return tmp end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, -0.25, -0.3333333333333333\right), -0.5\right)\\
\mathbf{if}\;-\log \left(1 - u1\right) \leq 0.07199999690055847:\\
\;\;\;\;\sin \left(\left(2 \cdot \pi\right) \cdot u2\right) \cdot \sqrt{\frac{\left(-u1\right) \cdot \mathsf{fma}\left(t\_0, t\_0 \cdot \left(u1 \cdot u1\right), -1\right)}{\mathsf{fma}\left(u1, t\_0, 1\right)}}\\
\mathbf{else}:\\
\;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \left(u2 \cdot \mathsf{fma}\left(-1.3333333333333333 \cdot \left(u2 \cdot u2\right), \pi \cdot \left(\pi \cdot \pi\right), 2 \cdot \pi\right)\right)\\
\end{array}
\end{array}
if (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1))) < 0.0719999969Initial program 50.8%
Taylor expanded in u1 around 0
*-lowering-*.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
*-commutativeN/A
metadata-evalN/A
accelerator-lowering-fma.f3298.1
Simplified98.1%
distribute-lft-neg-inN/A
flip-+N/A
associate-*r/N/A
/-lowering-/.f32N/A
Applied egg-rr98.2%
if 0.0719999969 < (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1))) Initial program 97.8%
sub-negN/A
accelerator-lowering-log1p.f32N/A
neg-lowering-neg.f3298.6
Applied egg-rr98.6%
Taylor expanded in u2 around 0
*-lowering-*.f32N/A
associate-*r*N/A
accelerator-lowering-fma.f32N/A
*-lowering-*.f32N/A
unpow2N/A
*-lowering-*.f32N/A
cube-multN/A
*-lowering-*.f32N/A
PI-lowering-PI.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f32N/A
PI-lowering-PI.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f3296.7
Simplified96.7%
Final simplification98.0%
(FPCore (cosTheta_i u1 u2)
:precision binary32
(if (<= (- (log (- 1.0 u1))) 0.07199999690055847)
(*
(sin (* (* 2.0 PI) u2))
(sqrt
(*
(- u1)
(fma u1 (fma u1 (fma u1 -0.25 -0.3333333333333333) -0.5) -1.0))))
(* (sqrt (- (log1p (- u1)))) (* 2.0 (* PI u2)))))
float code(float cosTheta_i, float u1, float u2) {
float tmp;
if (-logf((1.0f - u1)) <= 0.07199999690055847f) {
tmp = sinf(((2.0f * ((float) M_PI)) * u2)) * sqrtf((-u1 * fmaf(u1, fmaf(u1, fmaf(u1, -0.25f, -0.3333333333333333f), -0.5f), -1.0f)));
} else {
tmp = sqrtf(-log1pf(-u1)) * (2.0f * (((float) M_PI) * u2));
}
return tmp;
}
function code(cosTheta_i, u1, u2) tmp = Float32(0.0) if (Float32(-log(Float32(Float32(1.0) - u1))) <= Float32(0.07199999690055847)) tmp = Float32(sin(Float32(Float32(Float32(2.0) * Float32(pi)) * u2)) * sqrt(Float32(Float32(-u1) * fma(u1, fma(u1, fma(u1, Float32(-0.25), Float32(-0.3333333333333333)), Float32(-0.5)), Float32(-1.0))))); else tmp = Float32(sqrt(Float32(-log1p(Float32(-u1)))) * Float32(Float32(2.0) * Float32(Float32(pi) * u2))); end return tmp end
\begin{array}{l}
\\
\begin{array}{l}
\mathbf{if}\;-\log \left(1 - u1\right) \leq 0.07199999690055847:\\
\;\;\;\;\sin \left(\left(2 \cdot \pi\right) \cdot u2\right) \cdot \sqrt{\left(-u1\right) \cdot \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, -0.25, -0.3333333333333333\right), -0.5\right), -1\right)}\\
\mathbf{else}:\\
\;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \left(2 \cdot \left(\pi \cdot u2\right)\right)\\
\end{array}
\end{array}
if (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1))) < 0.0719999969Initial program 50.8%
Taylor expanded in u1 around 0
*-lowering-*.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
*-commutativeN/A
metadata-evalN/A
accelerator-lowering-fma.f3298.1
Simplified98.1%
if 0.0719999969 < (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1))) Initial program 97.8%
sub-negN/A
accelerator-lowering-log1p.f32N/A
neg-lowering-neg.f3298.6
Applied egg-rr98.6%
Taylor expanded in u2 around 0
*-lowering-*.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f3289.1
Simplified89.1%
Final simplification96.9%
(FPCore (cosTheta_i u1 u2)
:precision binary32
(if (<= (- (log (- 1.0 u1))) 0.07199999690055847)
(*
(sin (* (* 2.0 PI) u2))
(sqrt (fma (* u1 u1) (fma u1 (fma u1 0.25 0.3333333333333333) 0.5) u1)))
(* (sqrt (- (log1p (- u1)))) (* 2.0 (* PI u2)))))
float code(float cosTheta_i, float u1, float u2) {
float tmp;
if (-logf((1.0f - u1)) <= 0.07199999690055847f) {
tmp = sinf(((2.0f * ((float) M_PI)) * u2)) * sqrtf(fmaf((u1 * u1), fmaf(u1, fmaf(u1, 0.25f, 0.3333333333333333f), 0.5f), u1));
} else {
tmp = sqrtf(-log1pf(-u1)) * (2.0f * (((float) M_PI) * u2));
}
return tmp;
}
function code(cosTheta_i, u1, u2) tmp = Float32(0.0) if (Float32(-log(Float32(Float32(1.0) - u1))) <= Float32(0.07199999690055847)) tmp = Float32(sin(Float32(Float32(Float32(2.0) * Float32(pi)) * u2)) * sqrt(fma(Float32(u1 * u1), fma(u1, fma(u1, Float32(0.25), Float32(0.3333333333333333)), Float32(0.5)), u1))); else tmp = Float32(sqrt(Float32(-log1p(Float32(-u1)))) * Float32(Float32(2.0) * Float32(Float32(pi) * u2))); end return tmp end
\begin{array}{l}
\\
\begin{array}{l}
\mathbf{if}\;-\log \left(1 - u1\right) \leq 0.07199999690055847:\\
\;\;\;\;\sin \left(\left(2 \cdot \pi\right) \cdot u2\right) \cdot \sqrt{\mathsf{fma}\left(u1 \cdot u1, \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, 0.25, 0.3333333333333333\right), 0.5\right), u1\right)}\\
\mathbf{else}:\\
\;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \left(2 \cdot \left(\pi \cdot u2\right)\right)\\
\end{array}
\end{array}
if (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1))) < 0.0719999969Initial program 50.8%
Taylor expanded in u1 around 0
+-commutativeN/A
distribute-lft-inN/A
associate-*r*N/A
unpow2N/A
*-rgt-identityN/A
accelerator-lowering-fma.f32N/A
unpow2N/A
*-lowering-*.f32N/A
+-commutativeN/A
accelerator-lowering-fma.f32N/A
+-commutativeN/A
*-commutativeN/A
accelerator-lowering-fma.f3298.1
Simplified98.1%
if 0.0719999969 < (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1))) Initial program 97.8%
sub-negN/A
accelerator-lowering-log1p.f32N/A
neg-lowering-neg.f3298.6
Applied egg-rr98.6%
Taylor expanded in u2 around 0
*-lowering-*.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f3289.1
Simplified89.1%
Final simplification96.9%
(FPCore (cosTheta_i u1 u2)
:precision binary32
(let* ((t_0 (* (* 2.0 PI) u2)))
(if (<= t_0 0.05000000074505806)
(*
(sqrt (- (log1p (- u1))))
(*
u2
(fma (* -1.3333333333333333 (* u2 u2)) (* PI (* PI PI)) (* 2.0 PI))))
(*
(sin t_0)
(sqrt
(*
(- u1)
(fma u1 (fma u1 (fma u1 -0.25 -0.3333333333333333) -0.5) -1.0)))))))
float code(float cosTheta_i, float u1, float u2) {
float t_0 = (2.0f * ((float) M_PI)) * u2;
float tmp;
if (t_0 <= 0.05000000074505806f) {
tmp = sqrtf(-log1pf(-u1)) * (u2 * fmaf((-1.3333333333333333f * (u2 * u2)), (((float) M_PI) * (((float) M_PI) * ((float) M_PI))), (2.0f * ((float) M_PI))));
} else {
tmp = sinf(t_0) * sqrtf((-u1 * fmaf(u1, fmaf(u1, fmaf(u1, -0.25f, -0.3333333333333333f), -0.5f), -1.0f)));
}
return tmp;
}
function code(cosTheta_i, u1, u2) t_0 = Float32(Float32(Float32(2.0) * Float32(pi)) * u2) tmp = Float32(0.0) if (t_0 <= Float32(0.05000000074505806)) tmp = Float32(sqrt(Float32(-log1p(Float32(-u1)))) * Float32(u2 * fma(Float32(Float32(-1.3333333333333333) * Float32(u2 * u2)), Float32(Float32(pi) * Float32(Float32(pi) * Float32(pi))), Float32(Float32(2.0) * Float32(pi))))); else tmp = Float32(sin(t_0) * sqrt(Float32(Float32(-u1) * fma(u1, fma(u1, fma(u1, Float32(-0.25), Float32(-0.3333333333333333)), Float32(-0.5)), Float32(-1.0))))); end return tmp end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(2 \cdot \pi\right) \cdot u2\\
\mathbf{if}\;t\_0 \leq 0.05000000074505806:\\
\;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \left(u2 \cdot \mathsf{fma}\left(-1.3333333333333333 \cdot \left(u2 \cdot u2\right), \pi \cdot \left(\pi \cdot \pi\right), 2 \cdot \pi\right)\right)\\
\mathbf{else}:\\
\;\;\;\;\sin t\_0 \cdot \sqrt{\left(-u1\right) \cdot \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, -0.25, -0.3333333333333333\right), -0.5\right), -1\right)}\\
\end{array}
\end{array}
if (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) < 0.0500000007Initial program 59.2%
sub-negN/A
accelerator-lowering-log1p.f32N/A
neg-lowering-neg.f3298.6
Applied egg-rr98.6%
Taylor expanded in u2 around 0
*-lowering-*.f32N/A
associate-*r*N/A
accelerator-lowering-fma.f32N/A
*-lowering-*.f32N/A
unpow2N/A
*-lowering-*.f32N/A
cube-multN/A
*-lowering-*.f32N/A
PI-lowering-PI.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f32N/A
PI-lowering-PI.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f3298.7
Simplified98.7%
if 0.0500000007 < (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) Initial program 49.9%
Taylor expanded in u1 around 0
*-lowering-*.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
*-commutativeN/A
metadata-evalN/A
accelerator-lowering-fma.f3294.8
Simplified94.8%
Final simplification98.0%
(FPCore (cosTheta_i u1 u2)
:precision binary32
(if (<= (- 1.0 u1) 0.9300000071525574)
(*
(sqrt (- (log (- 1.0 u1))))
(* u2 (* PI (fma (* -1.3333333333333333 (* u2 u2)) (* PI PI) 2.0))))
(*
(sin (* (* 2.0 PI) u2))
(sqrt
(*
(- u1)
(fma u1 (fma u1 (fma u1 -0.25 -0.3333333333333333) -0.5) -1.0))))))
float code(float cosTheta_i, float u1, float u2) {
float tmp;
if ((1.0f - u1) <= 0.9300000071525574f) {
tmp = sqrtf(-logf((1.0f - u1))) * (u2 * (((float) M_PI) * fmaf((-1.3333333333333333f * (u2 * u2)), (((float) M_PI) * ((float) M_PI)), 2.0f)));
} else {
tmp = sinf(((2.0f * ((float) M_PI)) * u2)) * sqrtf((-u1 * fmaf(u1, fmaf(u1, fmaf(u1, -0.25f, -0.3333333333333333f), -0.5f), -1.0f)));
}
return tmp;
}
function code(cosTheta_i, u1, u2) tmp = Float32(0.0) if (Float32(Float32(1.0) - u1) <= Float32(0.9300000071525574)) tmp = Float32(sqrt(Float32(-log(Float32(Float32(1.0) - u1)))) * Float32(u2 * Float32(Float32(pi) * fma(Float32(Float32(-1.3333333333333333) * Float32(u2 * u2)), Float32(Float32(pi) * Float32(pi)), Float32(2.0))))); else tmp = Float32(sin(Float32(Float32(Float32(2.0) * Float32(pi)) * u2)) * sqrt(Float32(Float32(-u1) * fma(u1, fma(u1, fma(u1, Float32(-0.25), Float32(-0.3333333333333333)), Float32(-0.5)), Float32(-1.0))))); end return tmp end
\begin{array}{l}
\\
\begin{array}{l}
\mathbf{if}\;1 - u1 \leq 0.9300000071525574:\\
\;\;\;\;\sqrt{-\log \left(1 - u1\right)} \cdot \left(u2 \cdot \left(\pi \cdot \mathsf{fma}\left(-1.3333333333333333 \cdot \left(u2 \cdot u2\right), \pi \cdot \pi, 2\right)\right)\right)\\
\mathbf{else}:\\
\;\;\;\;\sin \left(\left(2 \cdot \pi\right) \cdot u2\right) \cdot \sqrt{\left(-u1\right) \cdot \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, -0.25, -0.3333333333333333\right), -0.5\right), -1\right)}\\
\end{array}
\end{array}
if (-.f32 #s(literal 1 binary32) u1) < 0.930000007Initial program 97.8%
Taylor expanded in u2 around 0
*-commutativeN/A
associate-*r*N/A
*-commutativeN/A
+-commutativeN/A
*-lowering-*.f32N/A
+-commutativeN/A
*-commutativeN/A
associate-*r*N/A
*-commutativeN/A
associate-*r*N/A
unpow3N/A
associate-*r*N/A
distribute-rgt-outN/A
Simplified95.9%
if 0.930000007 < (-.f32 #s(literal 1 binary32) u1) Initial program 50.8%
Taylor expanded in u1 around 0
*-lowering-*.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
*-commutativeN/A
metadata-evalN/A
accelerator-lowering-fma.f3298.1
Simplified98.1%
Final simplification97.8%
(FPCore (cosTheta_i u1 u2)
:precision binary32
(if (<= (- 1.0 u1) 0.9800000190734863)
(* (sqrt (- (log1p (- u1)))) (* 2.0 (* PI u2)))
(*
(sin (* (* 2.0 PI) u2))
(sqrt (* (- u1) (fma u1 (fma u1 -0.3333333333333333 -0.5) -1.0))))))
float code(float cosTheta_i, float u1, float u2) {
float tmp;
if ((1.0f - u1) <= 0.9800000190734863f) {
tmp = sqrtf(-log1pf(-u1)) * (2.0f * (((float) M_PI) * u2));
} else {
tmp = sinf(((2.0f * ((float) M_PI)) * u2)) * sqrtf((-u1 * fmaf(u1, fmaf(u1, -0.3333333333333333f, -0.5f), -1.0f)));
}
return tmp;
}
function code(cosTheta_i, u1, u2) tmp = Float32(0.0) if (Float32(Float32(1.0) - u1) <= Float32(0.9800000190734863)) tmp = Float32(sqrt(Float32(-log1p(Float32(-u1)))) * Float32(Float32(2.0) * Float32(Float32(pi) * u2))); else tmp = Float32(sin(Float32(Float32(Float32(2.0) * Float32(pi)) * u2)) * sqrt(Float32(Float32(-u1) * fma(u1, fma(u1, Float32(-0.3333333333333333), Float32(-0.5)), Float32(-1.0))))); end return tmp end
\begin{array}{l}
\\
\begin{array}{l}
\mathbf{if}\;1 - u1 \leq 0.9800000190734863:\\
\;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \left(2 \cdot \left(\pi \cdot u2\right)\right)\\
\mathbf{else}:\\
\;\;\;\;\sin \left(\left(2 \cdot \pi\right) \cdot u2\right) \cdot \sqrt{\left(-u1\right) \cdot \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, -0.3333333333333333, -0.5\right), -1\right)}\\
\end{array}
\end{array}
if (-.f32 #s(literal 1 binary32) u1) < 0.980000019Initial program 96.6%
sub-negN/A
accelerator-lowering-log1p.f32N/A
neg-lowering-neg.f3298.2
Applied egg-rr98.2%
Taylor expanded in u2 around 0
*-lowering-*.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f3288.6
Simplified88.6%
if 0.980000019 < (-.f32 #s(literal 1 binary32) u1) Initial program 47.5%
Taylor expanded in u1 around 0
*-lowering-*.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
*-commutativeN/A
metadata-evalN/A
accelerator-lowering-fma.f3298.2
Simplified98.2%
Final simplification96.3%
(FPCore (cosTheta_i u1 u2)
:precision binary32
(let* ((t_0 (* 2.0 (* PI u2))))
(if (<= (* (* 2.0 PI) u2) 0.0011599999852478504)
(* (sqrt (- (log1p (- u1)))) t_0)
(* (sin t_0) (* (fma u1 0.25 1.0) (sqrt u1))))))
float code(float cosTheta_i, float u1, float u2) {
float t_0 = 2.0f * (((float) M_PI) * u2);
float tmp;
if (((2.0f * ((float) M_PI)) * u2) <= 0.0011599999852478504f) {
tmp = sqrtf(-log1pf(-u1)) * t_0;
} else {
tmp = sinf(t_0) * (fmaf(u1, 0.25f, 1.0f) * sqrtf(u1));
}
return tmp;
}
function code(cosTheta_i, u1, u2) t_0 = Float32(Float32(2.0) * Float32(Float32(pi) * u2)) tmp = Float32(0.0) if (Float32(Float32(Float32(2.0) * Float32(pi)) * u2) <= Float32(0.0011599999852478504)) tmp = Float32(sqrt(Float32(-log1p(Float32(-u1)))) * t_0); else tmp = Float32(sin(t_0) * Float32(fma(u1, Float32(0.25), Float32(1.0)) * sqrt(u1))); end return tmp end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := 2 \cdot \left(\pi \cdot u2\right)\\
\mathbf{if}\;\left(2 \cdot \pi\right) \cdot u2 \leq 0.0011599999852478504:\\
\;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot t\_0\\
\mathbf{else}:\\
\;\;\;\;\sin t\_0 \cdot \left(\mathsf{fma}\left(u1, 0.25, 1\right) \cdot \sqrt{u1}\right)\\
\end{array}
\end{array}
if (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) < 0.00115999999Initial program 60.9%
sub-negN/A
accelerator-lowering-log1p.f32N/A
neg-lowering-neg.f3298.6
Applied egg-rr98.6%
Taylor expanded in u2 around 0
*-lowering-*.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f3298.5
Simplified98.5%
if 0.00115999999 < (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) Initial program 51.1%
Applied egg-rr48.1%
Taylor expanded in u1 around 0
associate-*r*N/A
distribute-rgt-outN/A
*-lowering-*.f32N/A
sin-lowering-sin.f32N/A
*-lowering-*.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f32N/A
accelerator-lowering-fma.f32N/A
sqrt-lowering-sqrt.f32N/A
cube-multN/A
unpow2N/A
*-lowering-*.f32N/A
unpow2N/A
*-lowering-*.f32N/A
sqrt-lowering-sqrt.f3289.3
Simplified89.3%
*-commutativeN/A
flip-+N/A
associate-*l/N/A
/-lowering-/.f32N/A
Applied egg-rr89.0%
*-commutativeN/A
associate-/l*N/A
Applied egg-rr89.0%
Final simplification95.1%
(FPCore (cosTheta_i u1 u2)
:precision binary32
(if (<= (- 1.0 u1) 0.9800000190734863)
(* (sqrt (- (log1p (- u1)))) (* 2.0 (* PI u2)))
(*
(sin (* (* 2.0 PI) u2))
(sqrt (fma (* u1 u1) (fma u1 0.3333333333333333 0.5) u1)))))
float code(float cosTheta_i, float u1, float u2) {
float tmp;
if ((1.0f - u1) <= 0.9800000190734863f) {
tmp = sqrtf(-log1pf(-u1)) * (2.0f * (((float) M_PI) * u2));
} else {
tmp = sinf(((2.0f * ((float) M_PI)) * u2)) * sqrtf(fmaf((u1 * u1), fmaf(u1, 0.3333333333333333f, 0.5f), u1));
}
return tmp;
}
function code(cosTheta_i, u1, u2) tmp = Float32(0.0) if (Float32(Float32(1.0) - u1) <= Float32(0.9800000190734863)) tmp = Float32(sqrt(Float32(-log1p(Float32(-u1)))) * Float32(Float32(2.0) * Float32(Float32(pi) * u2))); else tmp = Float32(sin(Float32(Float32(Float32(2.0) * Float32(pi)) * u2)) * sqrt(fma(Float32(u1 * u1), fma(u1, Float32(0.3333333333333333), Float32(0.5)), u1))); end return tmp end
\begin{array}{l}
\\
\begin{array}{l}
\mathbf{if}\;1 - u1 \leq 0.9800000190734863:\\
\;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \left(2 \cdot \left(\pi \cdot u2\right)\right)\\
\mathbf{else}:\\
\;\;\;\;\sin \left(\left(2 \cdot \pi\right) \cdot u2\right) \cdot \sqrt{\mathsf{fma}\left(u1 \cdot u1, \mathsf{fma}\left(u1, 0.3333333333333333, 0.5\right), u1\right)}\\
\end{array}
\end{array}
if (-.f32 #s(literal 1 binary32) u1) < 0.980000019Initial program 96.6%
sub-negN/A
accelerator-lowering-log1p.f32N/A
neg-lowering-neg.f3298.2
Applied egg-rr98.2%
Taylor expanded in u2 around 0
*-lowering-*.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f3288.6
Simplified88.6%
if 0.980000019 < (-.f32 #s(literal 1 binary32) u1) Initial program 47.5%
Taylor expanded in u1 around 0
+-commutativeN/A
distribute-lft-inN/A
associate-*r*N/A
unpow2N/A
*-rgt-identityN/A
accelerator-lowering-fma.f32N/A
unpow2N/A
*-lowering-*.f32N/A
+-commutativeN/A
*-commutativeN/A
accelerator-lowering-fma.f3298.1
Simplified98.1%
Final simplification96.2%
(FPCore (cosTheta_i u1 u2)
:precision binary32
(let* ((t_0 (* (* 2.0 PI) u2)))
(if (<= t_0 0.029999999329447746)
(* (sqrt (- (log1p (- u1)))) (* 2.0 (* PI u2)))
(* (sin t_0) (sqrt u1)))))
float code(float cosTheta_i, float u1, float u2) {
float t_0 = (2.0f * ((float) M_PI)) * u2;
float tmp;
if (t_0 <= 0.029999999329447746f) {
tmp = sqrtf(-log1pf(-u1)) * (2.0f * (((float) M_PI) * u2));
} else {
tmp = sinf(t_0) * sqrtf(u1);
}
return tmp;
}
function code(cosTheta_i, u1, u2) t_0 = Float32(Float32(Float32(2.0) * Float32(pi)) * u2) tmp = Float32(0.0) if (t_0 <= Float32(0.029999999329447746)) tmp = Float32(sqrt(Float32(-log1p(Float32(-u1)))) * Float32(Float32(2.0) * Float32(Float32(pi) * u2))); else tmp = Float32(sin(t_0) * sqrt(u1)); end return tmp end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(2 \cdot \pi\right) \cdot u2\\
\mathbf{if}\;t\_0 \leq 0.029999999329447746:\\
\;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \left(2 \cdot \left(\pi \cdot u2\right)\right)\\
\mathbf{else}:\\
\;\;\;\;\sin t\_0 \cdot \sqrt{u1}\\
\end{array}
\end{array}
if (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) < 0.0299999993Initial program 59.6%
sub-negN/A
accelerator-lowering-log1p.f32N/A
neg-lowering-neg.f3298.6
Applied egg-rr98.6%
Taylor expanded in u2 around 0
*-lowering-*.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f3295.5
Simplified95.5%
if 0.0299999993 < (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) Initial program 49.6%
Taylor expanded in u1 around 0
Simplified80.1%
Final simplification92.2%
(FPCore (cosTheta_i u1 u2)
:precision binary32
(let* ((t_0 (* (* 2.0 PI) u2)))
(if (<= t_0 0.0949999988079071)
(*
(*
u2
(fma (* -1.3333333333333333 (* u2 u2)) (* PI (* PI PI)) (* 2.0 PI)))
(sqrt
(*
(- u1)
(fma u1 (fma u1 (fma u1 -0.25 -0.3333333333333333) -0.5) -1.0))))
(* (sin t_0) (sqrt u1)))))
float code(float cosTheta_i, float u1, float u2) {
float t_0 = (2.0f * ((float) M_PI)) * u2;
float tmp;
if (t_0 <= 0.0949999988079071f) {
tmp = (u2 * fmaf((-1.3333333333333333f * (u2 * u2)), (((float) M_PI) * (((float) M_PI) * ((float) M_PI))), (2.0f * ((float) M_PI)))) * sqrtf((-u1 * fmaf(u1, fmaf(u1, fmaf(u1, -0.25f, -0.3333333333333333f), -0.5f), -1.0f)));
} else {
tmp = sinf(t_0) * sqrtf(u1);
}
return tmp;
}
function code(cosTheta_i, u1, u2) t_0 = Float32(Float32(Float32(2.0) * Float32(pi)) * u2) tmp = Float32(0.0) if (t_0 <= Float32(0.0949999988079071)) tmp = Float32(Float32(u2 * fma(Float32(Float32(-1.3333333333333333) * Float32(u2 * u2)), Float32(Float32(pi) * Float32(Float32(pi) * Float32(pi))), Float32(Float32(2.0) * Float32(pi)))) * sqrt(Float32(Float32(-u1) * fma(u1, fma(u1, fma(u1, Float32(-0.25), Float32(-0.3333333333333333)), Float32(-0.5)), Float32(-1.0))))); else tmp = Float32(sin(t_0) * sqrt(u1)); end return tmp end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(2 \cdot \pi\right) \cdot u2\\
\mathbf{if}\;t\_0 \leq 0.0949999988079071:\\
\;\;\;\;\left(u2 \cdot \mathsf{fma}\left(-1.3333333333333333 \cdot \left(u2 \cdot u2\right), \pi \cdot \left(\pi \cdot \pi\right), 2 \cdot \pi\right)\right) \cdot \sqrt{\left(-u1\right) \cdot \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, -0.25, -0.3333333333333333\right), -0.5\right), -1\right)}\\
\mathbf{else}:\\
\;\;\;\;\sin t\_0 \cdot \sqrt{u1}\\
\end{array}
\end{array}
if (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) < 0.0949999988Initial program 59.3%
Taylor expanded in u1 around 0
*-lowering-*.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
*-commutativeN/A
metadata-evalN/A
accelerator-lowering-fma.f3292.7
Simplified92.7%
Taylor expanded in u2 around 0
*-lowering-*.f32N/A
associate-*r*N/A
accelerator-lowering-fma.f32N/A
*-lowering-*.f32N/A
unpow2N/A
*-lowering-*.f32N/A
cube-multN/A
*-lowering-*.f32N/A
PI-lowering-PI.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f32N/A
PI-lowering-PI.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f3292.8
Simplified92.8%
if 0.0949999988 < (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) Initial program 48.2%
Taylor expanded in u1 around 0
Simplified79.5%
Final simplification90.5%
(FPCore (cosTheta_i u1 u2) :precision binary32 (* (* u2 (fma (* -1.3333333333333333 (* u2 u2)) (* PI (* PI PI)) (* 2.0 PI))) (sqrt (* (- u1) (fma u1 (fma u1 (fma u1 -0.25 -0.3333333333333333) -0.5) -1.0)))))
float code(float cosTheta_i, float u1, float u2) {
return (u2 * fmaf((-1.3333333333333333f * (u2 * u2)), (((float) M_PI) * (((float) M_PI) * ((float) M_PI))), (2.0f * ((float) M_PI)))) * sqrtf((-u1 * fmaf(u1, fmaf(u1, fmaf(u1, -0.25f, -0.3333333333333333f), -0.5f), -1.0f)));
}
function code(cosTheta_i, u1, u2) return Float32(Float32(u2 * fma(Float32(Float32(-1.3333333333333333) * Float32(u2 * u2)), Float32(Float32(pi) * Float32(Float32(pi) * Float32(pi))), Float32(Float32(2.0) * Float32(pi)))) * sqrt(Float32(Float32(-u1) * fma(u1, fma(u1, fma(u1, Float32(-0.25), Float32(-0.3333333333333333)), Float32(-0.5)), Float32(-1.0))))) end
\begin{array}{l}
\\
\left(u2 \cdot \mathsf{fma}\left(-1.3333333333333333 \cdot \left(u2 \cdot u2\right), \pi \cdot \left(\pi \cdot \pi\right), 2 \cdot \pi\right)\right) \cdot \sqrt{\left(-u1\right) \cdot \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, -0.25, -0.3333333333333333\right), -0.5\right), -1\right)}
\end{array}
Initial program 57.4%
Taylor expanded in u1 around 0
*-lowering-*.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
*-commutativeN/A
metadata-evalN/A
accelerator-lowering-fma.f3293.0
Simplified93.0%
Taylor expanded in u2 around 0
*-lowering-*.f32N/A
associate-*r*N/A
accelerator-lowering-fma.f32N/A
*-lowering-*.f32N/A
unpow2N/A
*-lowering-*.f32N/A
cube-multN/A
*-lowering-*.f32N/A
PI-lowering-PI.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f32N/A
PI-lowering-PI.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f3285.7
Simplified85.7%
Final simplification85.7%
(FPCore (cosTheta_i u1 u2)
:precision binary32
(if (<= (* (* 2.0 PI) u2) 0.004000000189989805)
(*
(sqrt
(* (- u1) (fma u1 (fma u1 (fma u1 -0.25 -0.3333333333333333) -0.5) -1.0)))
(* 2.0 (* PI u2)))
(*
(* u2 (fma (* -1.3333333333333333 (* u2 u2)) (* PI (* PI PI)) (* 2.0 PI)))
(sqrt u1))))
float code(float cosTheta_i, float u1, float u2) {
float tmp;
if (((2.0f * ((float) M_PI)) * u2) <= 0.004000000189989805f) {
tmp = sqrtf((-u1 * fmaf(u1, fmaf(u1, fmaf(u1, -0.25f, -0.3333333333333333f), -0.5f), -1.0f))) * (2.0f * (((float) M_PI) * u2));
} else {
tmp = (u2 * fmaf((-1.3333333333333333f * (u2 * u2)), (((float) M_PI) * (((float) M_PI) * ((float) M_PI))), (2.0f * ((float) M_PI)))) * sqrtf(u1);
}
return tmp;
}
function code(cosTheta_i, u1, u2) tmp = Float32(0.0) if (Float32(Float32(Float32(2.0) * Float32(pi)) * u2) <= Float32(0.004000000189989805)) tmp = Float32(sqrt(Float32(Float32(-u1) * fma(u1, fma(u1, fma(u1, Float32(-0.25), Float32(-0.3333333333333333)), Float32(-0.5)), Float32(-1.0)))) * Float32(Float32(2.0) * Float32(Float32(pi) * u2))); else tmp = Float32(Float32(u2 * fma(Float32(Float32(-1.3333333333333333) * Float32(u2 * u2)), Float32(Float32(pi) * Float32(Float32(pi) * Float32(pi))), Float32(Float32(2.0) * Float32(pi)))) * sqrt(u1)); end return tmp end
\begin{array}{l}
\\
\begin{array}{l}
\mathbf{if}\;\left(2 \cdot \pi\right) \cdot u2 \leq 0.004000000189989805:\\
\;\;\;\;\sqrt{\left(-u1\right) \cdot \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, -0.25, -0.3333333333333333\right), -0.5\right), -1\right)} \cdot \left(2 \cdot \left(\pi \cdot u2\right)\right)\\
\mathbf{else}:\\
\;\;\;\;\left(u2 \cdot \mathsf{fma}\left(-1.3333333333333333 \cdot \left(u2 \cdot u2\right), \pi \cdot \left(\pi \cdot \pi\right), 2 \cdot \pi\right)\right) \cdot \sqrt{u1}\\
\end{array}
\end{array}
if (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) < 0.00400000019Initial program 60.6%
Taylor expanded in u1 around 0
*-lowering-*.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
*-commutativeN/A
metadata-evalN/A
accelerator-lowering-fma.f3293.1
Simplified93.1%
Taylor expanded in u2 around 0
*-lowering-*.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f3292.6
Simplified92.6%
if 0.00400000019 < (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) Initial program 50.6%
Taylor expanded in u1 around 0
Simplified79.3%
Taylor expanded in u2 around 0
*-lowering-*.f32N/A
associate-*r*N/A
accelerator-lowering-fma.f32N/A
*-lowering-*.f32N/A
unpow2N/A
*-lowering-*.f32N/A
cube-multN/A
*-lowering-*.f32N/A
PI-lowering-PI.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f32N/A
PI-lowering-PI.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f3262.8
Simplified62.8%
Final simplification83.3%
(FPCore (cosTheta_i u1 u2) :precision binary32 (* (sqrt (* (- u1) (fma u1 (fma u1 (fma u1 -0.25 -0.3333333333333333) -0.5) -1.0))) (* 2.0 (* PI u2))))
float code(float cosTheta_i, float u1, float u2) {
return sqrtf((-u1 * fmaf(u1, fmaf(u1, fmaf(u1, -0.25f, -0.3333333333333333f), -0.5f), -1.0f))) * (2.0f * (((float) M_PI) * u2));
}
function code(cosTheta_i, u1, u2) return Float32(sqrt(Float32(Float32(-u1) * fma(u1, fma(u1, fma(u1, Float32(-0.25), Float32(-0.3333333333333333)), Float32(-0.5)), Float32(-1.0)))) * Float32(Float32(2.0) * Float32(Float32(pi) * u2))) end
\begin{array}{l}
\\
\sqrt{\left(-u1\right) \cdot \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, \mathsf{fma}\left(u1, -0.25, -0.3333333333333333\right), -0.5\right), -1\right)} \cdot \left(2 \cdot \left(\pi \cdot u2\right)\right)
\end{array}
Initial program 57.4%
Taylor expanded in u1 around 0
*-lowering-*.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
metadata-evalN/A
accelerator-lowering-fma.f32N/A
sub-negN/A
*-commutativeN/A
metadata-evalN/A
accelerator-lowering-fma.f3293.0
Simplified93.0%
Taylor expanded in u2 around 0
*-lowering-*.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f3279.4
Simplified79.4%
Final simplification79.4%
(FPCore (cosTheta_i u1 u2) :precision binary32 (* PI (* 2.0 (* u2 (sqrt u1)))))
float code(float cosTheta_i, float u1, float u2) {
return ((float) M_PI) * (2.0f * (u2 * sqrtf(u1)));
}
function code(cosTheta_i, u1, u2) return Float32(Float32(pi) * Float32(Float32(2.0) * Float32(u2 * sqrt(u1)))) end
function tmp = code(cosTheta_i, u1, u2) tmp = single(pi) * (single(2.0) * (u2 * sqrt(u1))); end
\begin{array}{l}
\\
\pi \cdot \left(2 \cdot \left(u2 \cdot \sqrt{u1}\right)\right)
\end{array}
Initial program 57.4%
Taylor expanded in u1 around 0
Simplified76.3%
Taylor expanded in u2 around 0
associate-*r*N/A
*-lowering-*.f32N/A
*-lowering-*.f32N/A
sqrt-lowering-sqrt.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f3266.4
Simplified66.4%
associate-*r*N/A
*-lowering-*.f32N/A
associate-*l*N/A
*-lowering-*.f32N/A
*-lowering-*.f32N/A
sqrt-lowering-sqrt.f32N/A
PI-lowering-PI.f3266.4
Applied egg-rr66.4%
Final simplification66.4%
(FPCore (cosTheta_i u1 u2) :precision binary32 (* (* PI u2) (* 2.0 (sqrt u1))))
float code(float cosTheta_i, float u1, float u2) {
return (((float) M_PI) * u2) * (2.0f * sqrtf(u1));
}
function code(cosTheta_i, u1, u2) return Float32(Float32(Float32(pi) * u2) * Float32(Float32(2.0) * sqrt(u1))) end
function tmp = code(cosTheta_i, u1, u2) tmp = (single(pi) * u2) * (single(2.0) * sqrt(u1)); end
\begin{array}{l}
\\
\left(\pi \cdot u2\right) \cdot \left(2 \cdot \sqrt{u1}\right)
\end{array}
Initial program 57.4%
Taylor expanded in u1 around 0
Simplified76.3%
Taylor expanded in u2 around 0
associate-*r*N/A
*-lowering-*.f32N/A
*-lowering-*.f32N/A
sqrt-lowering-sqrt.f32N/A
*-lowering-*.f32N/A
PI-lowering-PI.f3266.4
Simplified66.4%
Final simplification66.4%
herbie shell --seed 2024204
(FPCore (cosTheta_i u1 u2)
:name "Beckmann Sample, near normal, slope_y"
:precision binary32
:pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0)) (and (<= 2.328306437e-10 u1) (<= u1 1.0))) (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
(* (sqrt (- (log (- 1.0 u1)))) (sin (* (* 2.0 PI) u2))))