
(FPCore (u s)
:precision binary32
(let* ((t_0 (/ 1.0 (+ 1.0 (exp (/ PI s))))))
(*
(- s)
(log
(-
(/ 1.0 (+ (* u (- (/ 1.0 (+ 1.0 (exp (/ (- PI) s)))) t_0)) t_0))
1.0)))))
float code(float u, float s) {
float t_0 = 1.0f / (1.0f + expf((((float) M_PI) / s)));
return -s * logf(((1.0f / ((u * ((1.0f / (1.0f + expf((-((float) M_PI) / s)))) - t_0)) + t_0)) - 1.0f));
}
function code(u, s) t_0 = Float32(Float32(1.0) / Float32(Float32(1.0) + exp(Float32(Float32(pi) / s)))) return Float32(Float32(-s) * log(Float32(Float32(Float32(1.0) / Float32(Float32(u * Float32(Float32(Float32(1.0) / Float32(Float32(1.0) + exp(Float32(Float32(-Float32(pi)) / s)))) - t_0)) + t_0)) - Float32(1.0)))) end
function tmp = code(u, s) t_0 = single(1.0) / (single(1.0) + exp((single(pi) / s))); tmp = -s * log(((single(1.0) / ((u * ((single(1.0) / (single(1.0) + exp((-single(pi) / s)))) - t_0)) + t_0)) - single(1.0))); end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \frac{1}{1 + e^{\frac{\pi}{s}}}\\
\left(-s\right) \cdot \log \left(\frac{1}{u \cdot \left(\frac{1}{1 + e^{\frac{-\pi}{s}}} - t\_0\right) + t\_0} - 1\right)
\end{array}
\end{array}
Sampling outcomes in binary32 precision:
Herbie found 10 alternatives:
| Alternative | Accuracy | Speedup |
|---|
(FPCore (u s)
:precision binary32
(let* ((t_0 (/ 1.0 (+ 1.0 (exp (/ PI s))))))
(*
(- s)
(log
(-
(/ 1.0 (+ (* u (- (/ 1.0 (+ 1.0 (exp (/ (- PI) s)))) t_0)) t_0))
1.0)))))
float code(float u, float s) {
float t_0 = 1.0f / (1.0f + expf((((float) M_PI) / s)));
return -s * logf(((1.0f / ((u * ((1.0f / (1.0f + expf((-((float) M_PI) / s)))) - t_0)) + t_0)) - 1.0f));
}
function code(u, s) t_0 = Float32(Float32(1.0) / Float32(Float32(1.0) + exp(Float32(Float32(pi) / s)))) return Float32(Float32(-s) * log(Float32(Float32(Float32(1.0) / Float32(Float32(u * Float32(Float32(Float32(1.0) / Float32(Float32(1.0) + exp(Float32(Float32(-Float32(pi)) / s)))) - t_0)) + t_0)) - Float32(1.0)))) end
function tmp = code(u, s) t_0 = single(1.0) / (single(1.0) + exp((single(pi) / s))); tmp = -s * log(((single(1.0) / ((u * ((single(1.0) / (single(1.0) + exp((-single(pi) / s)))) - t_0)) + t_0)) - single(1.0))); end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \frac{1}{1 + e^{\frac{\pi}{s}}}\\
\left(-s\right) \cdot \log \left(\frac{1}{u \cdot \left(\frac{1}{1 + e^{\frac{-\pi}{s}}} - t\_0\right) + t\_0} - 1\right)
\end{array}
\end{array}
(FPCore (u s)
:precision binary32
(*
(- s)
(log
(+
-1.0
(/
1.0
(fma
(+
(/ 1.0 (+ 1.0 (exp (- 0.0 (/ PI s)))))
(/ -1.0 (+ 1.0 (exp (/ PI s)))))
u
(/ 1.0 (+ 1.0 (exp (* PI (/ 1.0 s)))))))))))
float code(float u, float s) {
return -s * logf((-1.0f + (1.0f / fmaf(((1.0f / (1.0f + expf((0.0f - (((float) M_PI) / s))))) + (-1.0f / (1.0f + expf((((float) M_PI) / s))))), u, (1.0f / (1.0f + expf((((float) M_PI) * (1.0f / s)))))))));
}
function code(u, s) return Float32(Float32(-s) * log(Float32(Float32(-1.0) + Float32(Float32(1.0) / fma(Float32(Float32(Float32(1.0) / Float32(Float32(1.0) + exp(Float32(Float32(0.0) - Float32(Float32(pi) / s))))) + Float32(Float32(-1.0) / Float32(Float32(1.0) + exp(Float32(Float32(pi) / s))))), u, Float32(Float32(1.0) / Float32(Float32(1.0) + exp(Float32(Float32(pi) * Float32(Float32(1.0) / s)))))))))) end
\begin{array}{l}
\\
\left(-s\right) \cdot \log \left(-1 + \frac{1}{\mathsf{fma}\left(\frac{1}{1 + e^{0 - \frac{\pi}{s}}} + \frac{-1}{1 + e^{\frac{\pi}{s}}}, u, \frac{1}{1 + e^{\pi \cdot \frac{1}{s}}}\right)}\right)
\end{array}
Initial program 99.1%
*-commutativeN/A
accelerator-lowering-fma.f32N/A
Applied egg-rr99.1%
clear-numN/A
associate-/r/N/A
*-lowering-*.f32N/A
/-lowering-/.f32N/A
PI-lowering-PI.f3299.1%
Applied egg-rr99.1%
Final simplification99.1%
(FPCore (u s)
:precision binary32
(let* ((t_0 (+ 1.0 (exp (/ PI s)))))
(*
(- s)
(log
(+
-1.0
(/
1.0
(fma
(+ (/ 1.0 (+ 1.0 (exp (- 0.0 (/ PI s))))) (/ -1.0 t_0))
u
(/ 1.0 t_0))))))))
float code(float u, float s) {
float t_0 = 1.0f + expf((((float) M_PI) / s));
return -s * logf((-1.0f + (1.0f / fmaf(((1.0f / (1.0f + expf((0.0f - (((float) M_PI) / s))))) + (-1.0f / t_0)), u, (1.0f / t_0)))));
}
function code(u, s) t_0 = Float32(Float32(1.0) + exp(Float32(Float32(pi) / s))) return Float32(Float32(-s) * log(Float32(Float32(-1.0) + Float32(Float32(1.0) / fma(Float32(Float32(Float32(1.0) / Float32(Float32(1.0) + exp(Float32(Float32(0.0) - Float32(Float32(pi) / s))))) + Float32(Float32(-1.0) / t_0)), u, Float32(Float32(1.0) / t_0)))))) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := 1 + e^{\frac{\pi}{s}}\\
\left(-s\right) \cdot \log \left(-1 + \frac{1}{\mathsf{fma}\left(\frac{1}{1 + e^{0 - \frac{\pi}{s}}} + \frac{-1}{t\_0}, u, \frac{1}{t\_0}\right)}\right)
\end{array}
\end{array}
Initial program 99.1%
*-commutativeN/A
accelerator-lowering-fma.f32N/A
Applied egg-rr99.1%
Final simplification99.1%
(FPCore (u s)
:precision binary32
(*
(- s)
(log
(+
-1.0
(/
1.0
(+
(/ 1.0 (+ 1.0 (exp (/ PI s))))
(/ u (+ 1.0 (exp (- 0.0 (/ PI s)))))))))))
float code(float u, float s) {
return -s * logf((-1.0f + (1.0f / ((1.0f / (1.0f + expf((((float) M_PI) / s)))) + (u / (1.0f + expf((0.0f - (((float) M_PI) / s)))))))));
}
function code(u, s) return Float32(Float32(-s) * log(Float32(Float32(-1.0) + Float32(Float32(1.0) / Float32(Float32(Float32(1.0) / Float32(Float32(1.0) + exp(Float32(Float32(pi) / s)))) + Float32(u / Float32(Float32(1.0) + exp(Float32(Float32(0.0) - Float32(Float32(pi) / s)))))))))) end
function tmp = code(u, s) tmp = -s * log((single(-1.0) + (single(1.0) / ((single(1.0) / (single(1.0) + exp((single(pi) / s)))) + (u / (single(1.0) + exp((single(0.0) - (single(pi) / s))))))))); end
\begin{array}{l}
\\
\left(-s\right) \cdot \log \left(-1 + \frac{1}{\frac{1}{1 + e^{\frac{\pi}{s}}} + \frac{u}{1 + e^{0 - \frac{\pi}{s}}}}\right)
\end{array}
Initial program 99.1%
Taylor expanded in s around -inf
mul-1-negN/A
unsub-negN/A
--lowering--.f32N/A
/-lowering-/.f32N/A
+-commutativeN/A
accelerator-lowering-fma.f32N/A
/-lowering-/.f32N/A
unpow2N/A
*-lowering-*.f32N/A
PI-lowering-PI.f32N/A
PI-lowering-PI.f32N/A
mul-1-negN/A
neg-lowering-neg.f32N/A
PI-lowering-PI.f3297.2%
Simplified97.2%
Taylor expanded in s around 0
/-lowering-/.f32N/A
+-lowering-+.f32N/A
exp-lowering-exp.f32N/A
mul-1-negN/A
neg-sub0N/A
--lowering--.f32N/A
/-lowering-/.f32N/A
PI-lowering-PI.f3298.8%
Simplified98.8%
Final simplification98.8%
(FPCore (u s)
:precision binary32
(*
(/ -1.0 (/ 1.0 s))
(log
(+
-1.0
(/
1.0
(+
(*
u
(+
(/ 1.0 (+ 1.0 (exp (- 0.0 (/ PI s)))))
(/ 1.0 (+ -1.0 (+ -1.0 (/ (fma -0.5 (/ (* PI PI) s) (- PI)) s))))))
(/ 1.0 (+ 1.0 (+ 1.0 (/ PI s))))))))))
float code(float u, float s) {
return (-1.0f / (1.0f / s)) * logf((-1.0f + (1.0f / ((u * ((1.0f / (1.0f + expf((0.0f - (((float) M_PI) / s))))) + (1.0f / (-1.0f + (-1.0f + (fmaf(-0.5f, ((((float) M_PI) * ((float) M_PI)) / s), -((float) M_PI)) / s)))))) + (1.0f / (1.0f + (1.0f + (((float) M_PI) / s))))))));
}
function code(u, s) return Float32(Float32(Float32(-1.0) / Float32(Float32(1.0) / s)) * log(Float32(Float32(-1.0) + Float32(Float32(1.0) / Float32(Float32(u * Float32(Float32(Float32(1.0) / Float32(Float32(1.0) + exp(Float32(Float32(0.0) - Float32(Float32(pi) / s))))) + Float32(Float32(1.0) / Float32(Float32(-1.0) + Float32(Float32(-1.0) + Float32(fma(Float32(-0.5), Float32(Float32(Float32(pi) * Float32(pi)) / s), Float32(-Float32(pi))) / s)))))) + Float32(Float32(1.0) / Float32(Float32(1.0) + Float32(Float32(1.0) + Float32(Float32(pi) / s))))))))) end
\begin{array}{l}
\\
\frac{-1}{\frac{1}{s}} \cdot \log \left(-1 + \frac{1}{u \cdot \left(\frac{1}{1 + e^{0 - \frac{\pi}{s}}} + \frac{1}{-1 + \left(-1 + \frac{\mathsf{fma}\left(-0.5, \frac{\pi \cdot \pi}{s}, -\pi\right)}{s}\right)}\right) + \frac{1}{1 + \left(1 + \frac{\pi}{s}\right)}}\right)
\end{array}
Initial program 99.1%
Taylor expanded in s around -inf
mul-1-negN/A
unsub-negN/A
--lowering--.f32N/A
/-lowering-/.f32N/A
+-commutativeN/A
accelerator-lowering-fma.f32N/A
/-lowering-/.f32N/A
unpow2N/A
*-lowering-*.f32N/A
PI-lowering-PI.f32N/A
PI-lowering-PI.f32N/A
mul-1-negN/A
neg-lowering-neg.f32N/A
PI-lowering-PI.f3297.2%
Simplified97.2%
Applied egg-rr97.0%
Taylor expanded in s around inf
+-lowering-+.f32N/A
/-lowering-/.f32N/A
PI-lowering-PI.f3284.1%
Simplified84.1%
Final simplification84.1%
(FPCore (u s) :precision binary32 (* (- s) (log (fma 4.0 (/ (* PI (fma -0.5 u 0.25)) s) 1.0))))
float code(float u, float s) {
return -s * logf(fmaf(4.0f, ((((float) M_PI) * fmaf(-0.5f, u, 0.25f)) / s), 1.0f));
}
function code(u, s) return Float32(Float32(-s) * log(fma(Float32(4.0), Float32(Float32(Float32(pi) * fma(Float32(-0.5), u, Float32(0.25))) / s), Float32(1.0)))) end
\begin{array}{l}
\\
\left(-s\right) \cdot \log \left(\mathsf{fma}\left(4, \frac{\pi \cdot \mathsf{fma}\left(-0.5, u, 0.25\right)}{s}, 1\right)\right)
\end{array}
Initial program 99.1%
Taylor expanded in s around inf
/-lowering-/.f32N/A
distribute-rgt-out--N/A
metadata-evalN/A
*-commutativeN/A
*-lowering-*.f32N/A
*-commutativeN/A
*-lowering-*.f32N/A
PI-lowering-PI.f325.1%
Simplified5.1%
Taylor expanded in s around -inf
associate-*r/N/A
+-commutativeN/A
associate-*r/N/A
accelerator-lowering-fma.f32N/A
/-lowering-/.f32N/A
cancel-sign-sub-invN/A
metadata-evalN/A
associate-*r*N/A
distribute-rgt-outN/A
*-lowering-*.f32N/A
PI-lowering-PI.f32N/A
accelerator-lowering-fma.f3225.2%
Simplified25.2%
(FPCore (u s) :precision binary32 (/ (fma (* s -4.0) (* PI (fma -0.5 u 0.25)) 0.0) s))
float code(float u, float s) {
return fmaf((s * -4.0f), (((float) M_PI) * fmaf(-0.5f, u, 0.25f)), 0.0f) / s;
}
function code(u, s) return Float32(fma(Float32(s * Float32(-4.0)), Float32(Float32(pi) * fma(Float32(-0.5), u, Float32(0.25))), Float32(0.0)) / s) end
\begin{array}{l}
\\
\frac{\mathsf{fma}\left(s \cdot -4, \pi \cdot \mathsf{fma}\left(-0.5, u, 0.25\right), 0\right)}{s}
\end{array}
Initial program 99.1%
Taylor expanded in s around -inf
Simplified10.2%
Taylor expanded in s around 0
/-lowering-/.f32N/A
Simplified12.9%
Final simplification12.9%
(FPCore (u s) :precision binary32 (* (fma PI -2.0 (/ PI u)) (- u)))
float code(float u, float s) {
return fmaf(((float) M_PI), -2.0f, (((float) M_PI) / u)) * -u;
}
function code(u, s) return Float32(fma(Float32(pi), Float32(-2.0), Float32(Float32(pi) / u)) * Float32(-u)) end
\begin{array}{l}
\\
\mathsf{fma}\left(\pi, -2, \frac{\pi}{u}\right) \cdot \left(-u\right)
\end{array}
Initial program 99.1%
Taylor expanded in s around -inf
Simplified10.2%
Applied egg-rr10.1%
Taylor expanded in u around -inf
mul-1-negN/A
neg-sub0N/A
--lowering--.f32N/A
*-lowering-*.f32N/A
*-commutativeN/A
accelerator-lowering-fma.f32N/A
PI-lowering-PI.f32N/A
/-lowering-/.f32N/A
PI-lowering-PI.f3212.9%
Simplified12.9%
Final simplification12.9%
(FPCore (u s) :precision binary32 (* PI (fma 2.0 u -1.0)))
float code(float u, float s) {
return ((float) M_PI) * fmaf(2.0f, u, -1.0f);
}
function code(u, s) return Float32(Float32(pi) * fma(Float32(2.0), u, Float32(-1.0))) end
\begin{array}{l}
\\
\pi \cdot \mathsf{fma}\left(2, u, -1\right)
\end{array}
Initial program 99.1%
Taylor expanded in s around -inf
Simplified10.2%
Applied egg-rr10.1%
Taylor expanded in u around 0
+-commutativeN/A
associate-*r*N/A
distribute-rgt-outN/A
*-lowering-*.f32N/A
PI-lowering-PI.f32N/A
accelerator-lowering-fma.f3212.9%
Simplified12.9%
(FPCore (u s) :precision binary32 (- PI))
float code(float u, float s) {
return -((float) M_PI);
}
function code(u, s) return Float32(-Float32(pi)) end
function tmp = code(u, s) tmp = -single(pi); end
\begin{array}{l}
\\
-\pi
\end{array}
Initial program 99.1%
Taylor expanded in u around 0
mul-1-negN/A
neg-lowering-neg.f32N/A
PI-lowering-PI.f3212.8%
Simplified12.8%
(FPCore (u s) :precision binary32 0.0)
float code(float u, float s) {
return 0.0f;
}
real(4) function code(u, s)
real(4), intent (in) :: u
real(4), intent (in) :: s
code = 0.0e0
end function
function code(u, s) return Float32(0.0) end
function tmp = code(u, s) tmp = single(0.0); end
\begin{array}{l}
\\
0
\end{array}
Initial program 99.1%
Taylor expanded in s around -inf
Simplified10.2%
Taylor expanded in s around 0
associate-*r/N/A
distribute-rgt-outN/A
metadata-evalN/A
mul0-rgtN/A
metadata-evalN/A
/-lowering-/.f3210.1%
Simplified10.1%
div010.1%
Applied egg-rr10.1%
herbie shell --seed 2024193
(FPCore (u s)
:name "Sample trimmed logistic on [-pi, pi]"
:precision binary32
:pre (and (and (<= 2.328306437e-10 u) (<= u 1.0)) (and (<= 0.0 s) (<= s 1.0651631)))
(* (- s) (log (- (/ 1.0 (+ (* u (- (/ 1.0 (+ 1.0 (exp (/ (- PI) s)))) (/ 1.0 (+ 1.0 (exp (/ PI s)))))) (/ 1.0 (+ 1.0 (exp (/ PI s)))))) 1.0))))