
(FPCore (cosTheta alpha)
:precision binary32
(let* ((t_0 (- (* alpha alpha) 1.0)))
(/
t_0
(* (* PI (log (* alpha alpha))) (+ 1.0 (* (* t_0 cosTheta) cosTheta))))))
float code(float cosTheta, float alpha) {
float t_0 = (alpha * alpha) - 1.0f;
return t_0 / ((((float) M_PI) * logf((alpha * alpha))) * (1.0f + ((t_0 * cosTheta) * cosTheta)));
}
function code(cosTheta, alpha) t_0 = Float32(Float32(alpha * alpha) - Float32(1.0)) return Float32(t_0 / Float32(Float32(Float32(pi) * log(Float32(alpha * alpha))) * Float32(Float32(1.0) + Float32(Float32(t_0 * cosTheta) * cosTheta)))) end
function tmp = code(cosTheta, alpha) t_0 = (alpha * alpha) - single(1.0); tmp = t_0 / ((single(pi) * log((alpha * alpha))) * (single(1.0) + ((t_0 * cosTheta) * cosTheta))); end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \alpha \cdot \alpha - 1\\
\frac{t\_0}{\left(\pi \cdot \log \left(\alpha \cdot \alpha\right)\right) \cdot \left(1 + \left(t\_0 \cdot cosTheta\right) \cdot cosTheta\right)}
\end{array}
\end{array}
Sampling outcomes in binary32 precision:
Herbie found 5 alternatives:
| Alternative | Accuracy | Speedup |
|---|
(FPCore (cosTheta alpha)
:precision binary32
(let* ((t_0 (- (* alpha alpha) 1.0)))
(/
t_0
(* (* PI (log (* alpha alpha))) (+ 1.0 (* (* t_0 cosTheta) cosTheta))))))
float code(float cosTheta, float alpha) {
float t_0 = (alpha * alpha) - 1.0f;
return t_0 / ((((float) M_PI) * logf((alpha * alpha))) * (1.0f + ((t_0 * cosTheta) * cosTheta)));
}
function code(cosTheta, alpha) t_0 = Float32(Float32(alpha * alpha) - Float32(1.0)) return Float32(t_0 / Float32(Float32(Float32(pi) * log(Float32(alpha * alpha))) * Float32(Float32(1.0) + Float32(Float32(t_0 * cosTheta) * cosTheta)))) end
function tmp = code(cosTheta, alpha) t_0 = (alpha * alpha) - single(1.0); tmp = t_0 / ((single(pi) * log((alpha * alpha))) * (single(1.0) + ((t_0 * cosTheta) * cosTheta))); end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \alpha \cdot \alpha - 1\\
\frac{t\_0}{\left(\pi \cdot \log \left(\alpha \cdot \alpha\right)\right) \cdot \left(1 + \left(t\_0 \cdot cosTheta\right) \cdot cosTheta\right)}
\end{array}
\end{array}
(FPCore (cosTheta alpha) :precision binary32 (let* ((t_0 (- (* alpha alpha) 1.0)) (t_1 (pow (* alpha alpha) PI))) (/ t_0 (log (* (pow t_1 (* (* cosTheta cosTheta) t_0)) (pow t_1 1.0))))))
float code(float cosTheta, float alpha) {
float t_0 = (alpha * alpha) - 1.0f;
float t_1 = powf((alpha * alpha), ((float) M_PI));
return t_0 / logf((powf(t_1, ((cosTheta * cosTheta) * t_0)) * powf(t_1, 1.0f)));
}
function code(cosTheta, alpha) t_0 = Float32(Float32(alpha * alpha) - Float32(1.0)) t_1 = Float32(alpha * alpha) ^ Float32(pi) return Float32(t_0 / log(Float32((t_1 ^ Float32(Float32(cosTheta * cosTheta) * t_0)) * (t_1 ^ Float32(1.0))))) end
function tmp = code(cosTheta, alpha) t_0 = (alpha * alpha) - single(1.0); t_1 = (alpha * alpha) ^ single(pi); tmp = t_0 / log(((t_1 ^ ((cosTheta * cosTheta) * t_0)) * (t_1 ^ single(1.0)))); end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \alpha \cdot \alpha - 1\\
t_1 := {\left(\alpha \cdot \alpha\right)}^{\pi}\\
\frac{t\_0}{\log \left({t\_1}^{\left(\left(cosTheta \cdot cosTheta\right) \cdot t\_0\right)} \cdot {t\_1}^{1}\right)}
\end{array}
\end{array}
Initial program 98.6%
lift-*.f32N/A
lift-PI.f32N/A
lift-*.f32N/A
lift-log.f32N/A
lift-*.f32N/A
lift-+.f32N/A
lift-*.f32N/A
lift-*.f32N/A
lift--.f32N/A
lift-*.f32N/A
*-commutativeN/A
lift-*.f32N/A
log-pow-revN/A
Applied rewrites98.7%
lift-pow.f32N/A
lift-*.f32N/A
lift-PI.f32N/A
lift-pow.f32N/A
lift-*.f32N/A
lift-fma.f32N/A
lift--.f32N/A
lift-*.f32N/A
unpow-prod-upN/A
lower-*.f32N/A
Applied rewrites98.7%
(FPCore (cosTheta alpha)
:precision binary32
(let* ((t_0
(*
(fma (* cosTheta cosTheta) (- (* alpha alpha) 1.0) 1.0)
(log (pow (* alpha alpha) PI))))
(t_1 (pow t_0 2.0)))
(- (/ (* (* alpha alpha) t_0) t_1) (/ t_0 t_1))))
float code(float cosTheta, float alpha) {
float t_0 = fmaf((cosTheta * cosTheta), ((alpha * alpha) - 1.0f), 1.0f) * logf(powf((alpha * alpha), ((float) M_PI)));
float t_1 = powf(t_0, 2.0f);
return (((alpha * alpha) * t_0) / t_1) - (t_0 / t_1);
}
function code(cosTheta, alpha) t_0 = Float32(fma(Float32(cosTheta * cosTheta), Float32(Float32(alpha * alpha) - Float32(1.0)), Float32(1.0)) * log((Float32(alpha * alpha) ^ Float32(pi)))) t_1 = t_0 ^ Float32(2.0) return Float32(Float32(Float32(Float32(alpha * alpha) * t_0) / t_1) - Float32(t_0 / t_1)) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \mathsf{fma}\left(cosTheta \cdot cosTheta, \alpha \cdot \alpha - 1, 1\right) \cdot \log \left({\left(\alpha \cdot \alpha\right)}^{\pi}\right)\\
t_1 := {t\_0}^{2}\\
\frac{\left(\alpha \cdot \alpha\right) \cdot t\_0}{t\_1} - \frac{t\_0}{t\_1}
\end{array}
\end{array}
Initial program 98.6%
Applied rewrites98.6%
Applied rewrites98.4%
Final simplification98.4%
(FPCore (cosTheta alpha) :precision binary32 (* (/ (+ alpha 1.0) (log (pow (pow alpha PI) 2.0))) (/ (- alpha 1.0) (fma (* cosTheta cosTheta) (- (* alpha alpha) 1.0) 1.0))))
float code(float cosTheta, float alpha) {
return ((alpha + 1.0f) / logf(powf(powf(alpha, ((float) M_PI)), 2.0f))) * ((alpha - 1.0f) / fmaf((cosTheta * cosTheta), ((alpha * alpha) - 1.0f), 1.0f));
}
function code(cosTheta, alpha) return Float32(Float32(Float32(alpha + Float32(1.0)) / log(((alpha ^ Float32(pi)) ^ Float32(2.0)))) * Float32(Float32(alpha - Float32(1.0)) / fma(Float32(cosTheta * cosTheta), Float32(Float32(alpha * alpha) - Float32(1.0)), Float32(1.0)))) end
\begin{array}{l}
\\
\frac{\alpha + 1}{\log \left({\left({\alpha}^{\pi}\right)}^{2}\right)} \cdot \frac{\alpha - 1}{\mathsf{fma}\left(cosTheta \cdot cosTheta, \alpha \cdot \alpha - 1, 1\right)}
\end{array}
Initial program 98.6%
Applied rewrites98.3%
(FPCore (cosTheta alpha)
:precision binary32
(/
(- (pow alpha 4.0) 1.0)
(*
(log (pow (* alpha alpha) PI))
(*
(+ 1.0 (* alpha alpha))
(- (+ 1.0 (pow (* alpha cosTheta) 2.0)) (* cosTheta cosTheta))))))
float code(float cosTheta, float alpha) {
return (powf(alpha, 4.0f) - 1.0f) / (logf(powf((alpha * alpha), ((float) M_PI))) * ((1.0f + (alpha * alpha)) * ((1.0f + powf((alpha * cosTheta), 2.0f)) - (cosTheta * cosTheta))));
}
function code(cosTheta, alpha) return Float32(Float32((alpha ^ Float32(4.0)) - Float32(1.0)) / Float32(log((Float32(alpha * alpha) ^ Float32(pi))) * Float32(Float32(Float32(1.0) + Float32(alpha * alpha)) * Float32(Float32(Float32(1.0) + (Float32(alpha * cosTheta) ^ Float32(2.0))) - Float32(cosTheta * cosTheta))))) end
function tmp = code(cosTheta, alpha) tmp = ((alpha ^ single(4.0)) - single(1.0)) / (log(((alpha * alpha) ^ single(pi))) * ((single(1.0) + (alpha * alpha)) * ((single(1.0) + ((alpha * cosTheta) ^ single(2.0))) - (cosTheta * cosTheta)))); end
\begin{array}{l}
\\
\frac{{\alpha}^{4} - 1}{\log \left({\left(\alpha \cdot \alpha\right)}^{\pi}\right) \cdot \left(\left(1 + \alpha \cdot \alpha\right) \cdot \left(\left(1 + {\left(\alpha \cdot cosTheta\right)}^{2}\right) - cosTheta \cdot cosTheta\right)\right)}
\end{array}
Initial program 98.6%
lift-*.f32N/A
lift-PI.f32N/A
lift-*.f32N/A
lift-log.f32N/A
lift-*.f32N/A
lift-+.f32N/A
lift-*.f32N/A
lift-*.f32N/A
lift--.f32N/A
lift-*.f32N/A
*-commutativeN/A
lift-*.f32N/A
log-pow-revN/A
Applied rewrites98.7%
Taylor expanded in alpha around inf
pow2N/A
flip--N/A
metadata-evalN/A
metadata-evalN/A
sqr-powN/A
metadata-evalN/A
pow2N/A
lower-*.f32N/A
pow2N/A
lift-*.f32N/A
lower--.f32N/A
pow-flipN/A
metadata-evalN/A
lower-pow.f3298.3
Applied rewrites98.3%
Taylor expanded in cosTheta around inf
lower-/.f32N/A
lift-pow.f32N/A
lift--.f32N/A
lower-*.f32N/A
lower-log.f32N/A
pow2N/A
lift-*.f32N/A
lift-pow.f32N/A
lift-PI.f32N/A
lower-*.f32N/A
Applied rewrites98.3%
(FPCore (cosTheta alpha)
:precision binary32
(let* ((t_0 (* (log alpha) PI))
(t_1 (fma (* cosTheta cosTheta) -1.0 1.0))
(t_2 (/ 0.5 (* t_0 t_1)))
(t_3 (- t_2 (/ (* -0.5 (* cosTheta cosTheta)) (* t_0 (pow t_1 2.0))))))
(-
(*
(fma (/ (* (pow (* cosTheta alpha) 2.0) t_3) t_1) -1.0 t_3)
(* alpha alpha))
t_2)))
float code(float cosTheta, float alpha) {
float t_0 = logf(alpha) * ((float) M_PI);
float t_1 = fmaf((cosTheta * cosTheta), -1.0f, 1.0f);
float t_2 = 0.5f / (t_0 * t_1);
float t_3 = t_2 - ((-0.5f * (cosTheta * cosTheta)) / (t_0 * powf(t_1, 2.0f)));
return (fmaf(((powf((cosTheta * alpha), 2.0f) * t_3) / t_1), -1.0f, t_3) * (alpha * alpha)) - t_2;
}
function code(cosTheta, alpha) t_0 = Float32(log(alpha) * Float32(pi)) t_1 = fma(Float32(cosTheta * cosTheta), Float32(-1.0), Float32(1.0)) t_2 = Float32(Float32(0.5) / Float32(t_0 * t_1)) t_3 = Float32(t_2 - Float32(Float32(Float32(-0.5) * Float32(cosTheta * cosTheta)) / Float32(t_0 * (t_1 ^ Float32(2.0))))) return Float32(Float32(fma(Float32(Float32((Float32(cosTheta * alpha) ^ Float32(2.0)) * t_3) / t_1), Float32(-1.0), t_3) * Float32(alpha * alpha)) - t_2) end
\begin{array}{l}
\\
\begin{array}{l}
t_0 := \log \alpha \cdot \pi\\
t_1 := \mathsf{fma}\left(cosTheta \cdot cosTheta, -1, 1\right)\\
t_2 := \frac{0.5}{t\_0 \cdot t\_1}\\
t_3 := t\_2 - \frac{-0.5 \cdot \left(cosTheta \cdot cosTheta\right)}{t\_0 \cdot {t\_1}^{2}}\\
\mathsf{fma}\left(\frac{{\left(cosTheta \cdot \alpha\right)}^{2} \cdot t\_3}{t\_1}, -1, t\_3\right) \cdot \left(\alpha \cdot \alpha\right) - t\_2
\end{array}
\end{array}
Initial program 98.6%
Taylor expanded in alpha around 0
Applied rewrites97.9%
herbie shell --seed 2025057
(FPCore (cosTheta alpha)
:name "GTR1 distribution"
:precision binary32
:pre (and (and (<= 0.0 cosTheta) (<= cosTheta 1.0)) (and (<= 0.0001 alpha) (<= alpha 1.0)))
(/ (- (* alpha alpha) 1.0) (* (* PI (log (* alpha alpha))) (+ 1.0 (* (* (- (* alpha alpha) 1.0) cosTheta) cosTheta)))))