Beckmann Distribution sample, tan2theta, alphax == alphay

Percentage Accurate: 56.1% → 99.0%
Time: 27.7s
Alternatives: 11
Speedup: 2.4×

Specification

?
\[\left(0.0001 \leq \alpha \land \alpha \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq u0 \land u0 \leq 1\right)\]
\[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]
(FPCore (alpha u0)
  :precision binary32
  :pre (and (and (<= 0.0001 alpha) (<= alpha 1.0))
     (and (<= 2.328306437e-10 u0) (<= u0 1.0)))
  (* (* (- alpha) alpha) (log (- 1.0 u0))))
float code(float alpha, float u0) {
	return (-alpha * alpha) * logf((1.0f - u0));
}
real(4) function code(alpha, u0)
use fmin_fmax_functions
    real(4), intent (in) :: alpha
    real(4), intent (in) :: u0
    code = (-alpha * alpha) * log((1.0e0 - u0))
end function
function code(alpha, u0)
	return Float32(Float32(Float32(-alpha) * alpha) * log(Float32(Float32(1.0) - u0)))
end
function tmp = code(alpha, u0)
	tmp = (-alpha * alpha) * log((single(1.0) - u0));
end
\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right)

Local Percentage Accuracy vs ?

The average percentage accuracy by input value. Horizontal axis shows value of an input variable; the variable is choosen in the title. Vertical axis is accuracy; higher is better. Red represent the original program, while blue represents Herbie's suggestion. These can be toggled with buttons below the plot. The line is an average while dots represent individual samples.

Accuracy vs Speed?

Herbie found 11 alternatives:

AlternativeAccuracySpeedup
The accuracy (vertical axis) and speed (horizontal axis) of each alternatives. Up and to the right is better. The red square shows the initial program, and each blue circle shows an alternative.The line shows the best available speed-accuracy tradeoffs.

Initial Program: 56.1% accurate, 1.0× speedup?

\[\left(0.0001 \leq \alpha \land \alpha \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq u0 \land u0 \leq 1\right)\]
\[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]
(FPCore (alpha u0)
  :precision binary32
  :pre (and (and (<= 0.0001 alpha) (<= alpha 1.0))
     (and (<= 2.328306437e-10 u0) (<= u0 1.0)))
  (* (* (- alpha) alpha) (log (- 1.0 u0))))
float code(float alpha, float u0) {
	return (-alpha * alpha) * logf((1.0f - u0));
}
real(4) function code(alpha, u0)
use fmin_fmax_functions
    real(4), intent (in) :: alpha
    real(4), intent (in) :: u0
    code = (-alpha * alpha) * log((1.0e0 - u0))
end function
function code(alpha, u0)
	return Float32(Float32(Float32(-alpha) * alpha) * log(Float32(Float32(1.0) - u0)))
end
function tmp = code(alpha, u0)
	tmp = (-alpha * alpha) * log((single(1.0) - u0));
end
\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right)

Alternative 1: 99.0% accurate, 0.9× speedup?

\[\left(0.0001 \leq \alpha \land \alpha \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq u0 \land u0 \leq 1\right)\]
\[\alpha \cdot \left(\mathsf{log1p}\left(-u0\right) \cdot \left(-\alpha\right)\right) \]
(FPCore (alpha u0)
  :precision binary32
  :pre (and (and (<= 0.0001 alpha) (<= alpha 1.0))
     (and (<= 2.328306437e-10 u0) (<= u0 1.0)))
  (* alpha (* (log1p (- u0)) (- alpha))))
float code(float alpha, float u0) {
	return alpha * (log1pf(-u0) * -alpha);
}
function code(alpha, u0)
	return Float32(alpha * Float32(log1p(Float32(-u0)) * Float32(-alpha)))
end
\alpha \cdot \left(\mathsf{log1p}\left(-u0\right) \cdot \left(-\alpha\right)\right)
Derivation
  1. Initial program 56.1%

    \[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]
  2. Applied rewrites56.1%

    \[\leadsto \alpha \cdot \left(\log \left(1 - u0\right) \cdot \left(-\alpha\right)\right) \]
  3. Applied rewrites99.0%

    \[\leadsto \alpha \cdot \left(\mathsf{log1p}\left(-u0\right) \cdot \left(-\alpha\right)\right) \]
  4. Add Preprocessing

Alternative 2: 96.8% accurate, 0.7× speedup?

\[\left(0.0001 \leq \alpha \land \alpha \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq u0 \land u0 \leq 1\right)\]
\[\begin{array}{l} \mathbf{if}\;1 - u0 \leq 0.9980000257492065:\\ \;\;\;\;\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right)\\ \mathbf{else}:\\ \;\;\;\;\alpha \cdot \mathsf{fma}\left(\alpha, u0, \left(u0 \cdot 0.5\right) \cdot \left(\alpha \cdot u0\right)\right)\\ \end{array} \]
(FPCore (alpha u0)
  :precision binary32
  :pre (and (and (<= 0.0001 alpha) (<= alpha 1.0))
     (and (<= 2.328306437e-10 u0) (<= u0 1.0)))
  (if (<= (- 1.0 u0) 0.9980000257492065)
  (* (* (- alpha) alpha) (log (- 1.0 u0)))
  (* alpha (fma alpha u0 (* (* u0 0.5) (* alpha u0))))))
float code(float alpha, float u0) {
	float tmp;
	if ((1.0f - u0) <= 0.9980000257492065f) {
		tmp = (-alpha * alpha) * logf((1.0f - u0));
	} else {
		tmp = alpha * fmaf(alpha, u0, ((u0 * 0.5f) * (alpha * u0)));
	}
	return tmp;
}
function code(alpha, u0)
	tmp = Float32(0.0)
	if (Float32(Float32(1.0) - u0) <= Float32(0.9980000257492065))
		tmp = Float32(Float32(Float32(-alpha) * alpha) * log(Float32(Float32(1.0) - u0)));
	else
		tmp = Float32(alpha * fma(alpha, u0, Float32(Float32(u0 * Float32(0.5)) * Float32(alpha * u0))));
	end
	return tmp
end
\begin{array}{l}
\mathbf{if}\;1 - u0 \leq 0.9980000257492065:\\
\;\;\;\;\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right)\\

\mathbf{else}:\\
\;\;\;\;\alpha \cdot \mathsf{fma}\left(\alpha, u0, \left(u0 \cdot 0.5\right) \cdot \left(\alpha \cdot u0\right)\right)\\


\end{array}
Derivation
  1. Split input into 2 regimes
  2. if (-.f32 #s(literal 1 binary32) u0) < 0.998000026

    1. Initial program 56.1%

      \[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]

    if 0.998000026 < (-.f32 #s(literal 1 binary32) u0)

    1. Initial program 56.1%

      \[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]
    2. Applied rewrites56.1%

      \[\leadsto \alpha \cdot \left(\log \left(1 - u0\right) \cdot \left(-\alpha\right)\right) \]
    3. Taylor expanded in u0 around 0

      \[\leadsto \alpha \cdot \left(u0 \cdot \left(\alpha + \frac{1}{2} \cdot \left(\alpha \cdot u0\right)\right)\right) \]
    4. Applied rewrites87.0%

      \[\leadsto \alpha \cdot \left(u0 \cdot \left(\alpha + 0.5 \cdot \left(\alpha \cdot u0\right)\right)\right) \]
    5. Applied rewrites87.1%

      \[\leadsto \alpha \cdot \mathsf{fma}\left(\alpha, u0, \left(u0 \cdot 0.5\right) \cdot \left(\alpha \cdot u0\right)\right) \]
  3. Recombined 2 regimes into one program.
  4. Add Preprocessing

Alternative 3: 96.7% accurate, 0.7× speedup?

\[\left(0.0001 \leq \alpha \land \alpha \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq u0 \land u0 \leq 1\right)\]
\[\begin{array}{l} \mathbf{if}\;1 - u0 \leq 0.9980000257492065:\\ \;\;\;\;\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right)\\ \mathbf{else}:\\ \;\;\;\;u0 \cdot \mathsf{fma}\left(\alpha, \alpha, \left(\alpha \cdot u0\right) \cdot \left(\alpha \cdot 0.5\right)\right)\\ \end{array} \]
(FPCore (alpha u0)
  :precision binary32
  :pre (and (and (<= 0.0001 alpha) (<= alpha 1.0))
     (and (<= 2.328306437e-10 u0) (<= u0 1.0)))
  (if (<= (- 1.0 u0) 0.9980000257492065)
  (* (* (- alpha) alpha) (log (- 1.0 u0)))
  (* u0 (fma alpha alpha (* (* alpha u0) (* alpha 0.5))))))
float code(float alpha, float u0) {
	float tmp;
	if ((1.0f - u0) <= 0.9980000257492065f) {
		tmp = (-alpha * alpha) * logf((1.0f - u0));
	} else {
		tmp = u0 * fmaf(alpha, alpha, ((alpha * u0) * (alpha * 0.5f)));
	}
	return tmp;
}
function code(alpha, u0)
	tmp = Float32(0.0)
	if (Float32(Float32(1.0) - u0) <= Float32(0.9980000257492065))
		tmp = Float32(Float32(Float32(-alpha) * alpha) * log(Float32(Float32(1.0) - u0)));
	else
		tmp = Float32(u0 * fma(alpha, alpha, Float32(Float32(alpha * u0) * Float32(alpha * Float32(0.5)))));
	end
	return tmp
end
\begin{array}{l}
\mathbf{if}\;1 - u0 \leq 0.9980000257492065:\\
\;\;\;\;\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right)\\

\mathbf{else}:\\
\;\;\;\;u0 \cdot \mathsf{fma}\left(\alpha, \alpha, \left(\alpha \cdot u0\right) \cdot \left(\alpha \cdot 0.5\right)\right)\\


\end{array}
Derivation
  1. Split input into 2 regimes
  2. if (-.f32 #s(literal 1 binary32) u0) < 0.998000026

    1. Initial program 56.1%

      \[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]

    if 0.998000026 < (-.f32 #s(literal 1 binary32) u0)

    1. Initial program 56.1%

      \[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]
    2. Taylor expanded in u0 around 0

      \[\leadsto u0 \cdot \left(\frac{1}{2} \cdot \left({\alpha}^{2} \cdot u0\right) + {\alpha}^{2}\right) \]
    3. Applied rewrites87.0%

      \[\leadsto u0 \cdot \mathsf{fma}\left(0.5, {\alpha}^{2} \cdot u0, {\alpha}^{2}\right) \]
    4. Applied rewrites87.1%

      \[\leadsto u0 \cdot \mathsf{fma}\left(\alpha, \alpha, \left(\alpha \cdot u0\right) \cdot \left(\alpha \cdot 0.5\right)\right) \]
  3. Recombined 2 regimes into one program.
  4. Add Preprocessing

Alternative 4: 96.6% accurate, 0.7× speedup?

\[\left(0.0001 \leq \alpha \land \alpha \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq u0 \land u0 \leq 1\right)\]
\[\begin{array}{l} \mathbf{if}\;1 - u0 \leq 0.9980000257492065:\\ \;\;\;\;\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right)\\ \mathbf{else}:\\ \;\;\;\;\left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0 \cdot u0, 0.5, u0\right)\\ \end{array} \]
(FPCore (alpha u0)
  :precision binary32
  :pre (and (and (<= 0.0001 alpha) (<= alpha 1.0))
     (and (<= 2.328306437e-10 u0) (<= u0 1.0)))
  (if (<= (- 1.0 u0) 0.9980000257492065)
  (* (* (- alpha) alpha) (log (- 1.0 u0)))
  (* (* alpha alpha) (fma (* u0 u0) 0.5 u0))))
float code(float alpha, float u0) {
	float tmp;
	if ((1.0f - u0) <= 0.9980000257492065f) {
		tmp = (-alpha * alpha) * logf((1.0f - u0));
	} else {
		tmp = (alpha * alpha) * fmaf((u0 * u0), 0.5f, u0);
	}
	return tmp;
}
function code(alpha, u0)
	tmp = Float32(0.0)
	if (Float32(Float32(1.0) - u0) <= Float32(0.9980000257492065))
		tmp = Float32(Float32(Float32(-alpha) * alpha) * log(Float32(Float32(1.0) - u0)));
	else
		tmp = Float32(Float32(alpha * alpha) * fma(Float32(u0 * u0), Float32(0.5), u0));
	end
	return tmp
end
\begin{array}{l}
\mathbf{if}\;1 - u0 \leq 0.9980000257492065:\\
\;\;\;\;\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right)\\

\mathbf{else}:\\
\;\;\;\;\left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0 \cdot u0, 0.5, u0\right)\\


\end{array}
Derivation
  1. Split input into 2 regimes
  2. if (-.f32 #s(literal 1 binary32) u0) < 0.998000026

    1. Initial program 56.1%

      \[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]

    if 0.998000026 < (-.f32 #s(literal 1 binary32) u0)

    1. Initial program 56.1%

      \[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]
    2. Taylor expanded in u0 around 0

      \[\leadsto u0 \cdot \left(u0 \cdot \left(\frac{1}{3} \cdot \left({\alpha}^{2} \cdot u0\right) + \frac{1}{2} \cdot {\alpha}^{2}\right) + {\alpha}^{2}\right) \]
    3. Applied rewrites91.2%

      \[\leadsto u0 \cdot \mathsf{fma}\left(u0, \mathsf{fma}\left(0.3333333333333333, {\alpha}^{2} \cdot u0, 0.5 \cdot {\alpha}^{2}\right), {\alpha}^{2}\right) \]
    4. Applied rewrites91.3%

      \[\leadsto u0 \cdot \mathsf{fma}\left(\alpha, \alpha, \left(\left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0, 0.3333333333333333, 0.5\right)\right) \cdot u0\right) \]
    5. Applied rewrites91.2%

      \[\leadsto \left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0 \cdot u0, \mathsf{fma}\left(u0, 0.3333333333333333, 0.5\right), u0\right) \]
    6. Taylor expanded in u0 around 0

      \[\leadsto \left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0 \cdot u0, \frac{1}{2}, u0\right) \]
    7. Applied rewrites87.0%

      \[\leadsto \left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0 \cdot u0, 0.5, u0\right) \]
  3. Recombined 2 regimes into one program.
  4. Add Preprocessing

Alternative 5: 96.6% accurate, 0.7× speedup?

\[\left(0.0001 \leq \alpha \land \alpha \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq u0 \land u0 \leq 1\right)\]
\[\begin{array}{l} \mathbf{if}\;1 - u0 \leq 0.9980000257492065:\\ \;\;\;\;\alpha \cdot \left(\log \left(1 - u0\right) \cdot \left(-\alpha\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0 \cdot u0, 0.5, u0\right)\\ \end{array} \]
(FPCore (alpha u0)
  :precision binary32
  :pre (and (and (<= 0.0001 alpha) (<= alpha 1.0))
     (and (<= 2.328306437e-10 u0) (<= u0 1.0)))
  (if (<= (- 1.0 u0) 0.9980000257492065)
  (* alpha (* (log (- 1.0 u0)) (- alpha)))
  (* (* alpha alpha) (fma (* u0 u0) 0.5 u0))))
float code(float alpha, float u0) {
	float tmp;
	if ((1.0f - u0) <= 0.9980000257492065f) {
		tmp = alpha * (logf((1.0f - u0)) * -alpha);
	} else {
		tmp = (alpha * alpha) * fmaf((u0 * u0), 0.5f, u0);
	}
	return tmp;
}
function code(alpha, u0)
	tmp = Float32(0.0)
	if (Float32(Float32(1.0) - u0) <= Float32(0.9980000257492065))
		tmp = Float32(alpha * Float32(log(Float32(Float32(1.0) - u0)) * Float32(-alpha)));
	else
		tmp = Float32(Float32(alpha * alpha) * fma(Float32(u0 * u0), Float32(0.5), u0));
	end
	return tmp
end
\begin{array}{l}
\mathbf{if}\;1 - u0 \leq 0.9980000257492065:\\
\;\;\;\;\alpha \cdot \left(\log \left(1 - u0\right) \cdot \left(-\alpha\right)\right)\\

\mathbf{else}:\\
\;\;\;\;\left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0 \cdot u0, 0.5, u0\right)\\


\end{array}
Derivation
  1. Split input into 2 regimes
  2. if (-.f32 #s(literal 1 binary32) u0) < 0.998000026

    1. Initial program 56.1%

      \[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]
    2. Applied rewrites56.1%

      \[\leadsto \alpha \cdot \left(\log \left(1 - u0\right) \cdot \left(-\alpha\right)\right) \]

    if 0.998000026 < (-.f32 #s(literal 1 binary32) u0)

    1. Initial program 56.1%

      \[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]
    2. Taylor expanded in u0 around 0

      \[\leadsto u0 \cdot \left(u0 \cdot \left(\frac{1}{3} \cdot \left({\alpha}^{2} \cdot u0\right) + \frac{1}{2} \cdot {\alpha}^{2}\right) + {\alpha}^{2}\right) \]
    3. Applied rewrites91.2%

      \[\leadsto u0 \cdot \mathsf{fma}\left(u0, \mathsf{fma}\left(0.3333333333333333, {\alpha}^{2} \cdot u0, 0.5 \cdot {\alpha}^{2}\right), {\alpha}^{2}\right) \]
    4. Applied rewrites91.3%

      \[\leadsto u0 \cdot \mathsf{fma}\left(\alpha, \alpha, \left(\left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0, 0.3333333333333333, 0.5\right)\right) \cdot u0\right) \]
    5. Applied rewrites91.2%

      \[\leadsto \left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0 \cdot u0, \mathsf{fma}\left(u0, 0.3333333333333333, 0.5\right), u0\right) \]
    6. Taylor expanded in u0 around 0

      \[\leadsto \left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0 \cdot u0, \frac{1}{2}, u0\right) \]
    7. Applied rewrites87.0%

      \[\leadsto \left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0 \cdot u0, 0.5, u0\right) \]
  3. Recombined 2 regimes into one program.
  4. Add Preprocessing

Alternative 6: 87.0% accurate, 1.1× speedup?

\[\left(0.0001 \leq \alpha \land \alpha \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq u0 \land u0 \leq 1\right)\]
\[\left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0 \cdot u0, 0.5, u0\right) \]
(FPCore (alpha u0)
  :precision binary32
  :pre (and (and (<= 0.0001 alpha) (<= alpha 1.0))
     (and (<= 2.328306437e-10 u0) (<= u0 1.0)))
  (* (* alpha alpha) (fma (* u0 u0) 0.5 u0)))
float code(float alpha, float u0) {
	return (alpha * alpha) * fmaf((u0 * u0), 0.5f, u0);
}
function code(alpha, u0)
	return Float32(Float32(alpha * alpha) * fma(Float32(u0 * u0), Float32(0.5), u0))
end
\left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0 \cdot u0, 0.5, u0\right)
Derivation
  1. Initial program 56.1%

    \[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]
  2. Taylor expanded in u0 around 0

    \[\leadsto u0 \cdot \left(u0 \cdot \left(\frac{1}{3} \cdot \left({\alpha}^{2} \cdot u0\right) + \frac{1}{2} \cdot {\alpha}^{2}\right) + {\alpha}^{2}\right) \]
  3. Applied rewrites91.2%

    \[\leadsto u0 \cdot \mathsf{fma}\left(u0, \mathsf{fma}\left(0.3333333333333333, {\alpha}^{2} \cdot u0, 0.5 \cdot {\alpha}^{2}\right), {\alpha}^{2}\right) \]
  4. Applied rewrites91.3%

    \[\leadsto u0 \cdot \mathsf{fma}\left(\alpha, \alpha, \left(\left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0, 0.3333333333333333, 0.5\right)\right) \cdot u0\right) \]
  5. Applied rewrites91.2%

    \[\leadsto \left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0 \cdot u0, \mathsf{fma}\left(u0, 0.3333333333333333, 0.5\right), u0\right) \]
  6. Taylor expanded in u0 around 0

    \[\leadsto \left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0 \cdot u0, \frac{1}{2}, u0\right) \]
  7. Applied rewrites87.0%

    \[\leadsto \left(\alpha \cdot \alpha\right) \cdot \mathsf{fma}\left(u0 \cdot u0, 0.5, u0\right) \]
  8. Add Preprocessing

Alternative 7: 87.0% accurate, 1.1× speedup?

\[\left(0.0001 \leq \alpha \land \alpha \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq u0 \land u0 \leq 1\right)\]
\[u0 \cdot \left(\alpha \cdot \mathsf{fma}\left(0.5, \alpha \cdot u0, \alpha\right)\right) \]
(FPCore (alpha u0)
  :precision binary32
  :pre (and (and (<= 0.0001 alpha) (<= alpha 1.0))
     (and (<= 2.328306437e-10 u0) (<= u0 1.0)))
  (* u0 (* alpha (fma 0.5 (* alpha u0) alpha))))
float code(float alpha, float u0) {
	return u0 * (alpha * fmaf(0.5f, (alpha * u0), alpha));
}
function code(alpha, u0)
	return Float32(u0 * Float32(alpha * fma(Float32(0.5), Float32(alpha * u0), alpha)))
end
u0 \cdot \left(\alpha \cdot \mathsf{fma}\left(0.5, \alpha \cdot u0, \alpha\right)\right)
Derivation
  1. Initial program 56.1%

    \[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]
  2. Taylor expanded in u0 around 0

    \[\leadsto u0 \cdot \left(\frac{1}{2} \cdot \left({\alpha}^{2} \cdot u0\right) + {\alpha}^{2}\right) \]
  3. Applied rewrites87.0%

    \[\leadsto u0 \cdot \mathsf{fma}\left(0.5, {\alpha}^{2} \cdot u0, {\alpha}^{2}\right) \]
  4. Applied rewrites87.0%

    \[\leadsto u0 \cdot \mathsf{fma}\left(u0, 0.5 \cdot \left(\alpha \cdot \alpha\right), \alpha \cdot \alpha\right) \]
  5. Applied rewrites86.8%

    \[\leadsto u0 \cdot \left(\alpha \cdot \left(\mathsf{fma}\left(u0, 0.5, 1\right) \cdot \alpha\right)\right) \]
  6. Applied rewrites87.0%

    \[\leadsto u0 \cdot \left(\alpha \cdot \mathsf{fma}\left(0.5, \alpha \cdot u0, \alpha\right)\right) \]
  7. Add Preprocessing

Alternative 8: 86.8% accurate, 1.1× speedup?

\[\left(0.0001 \leq \alpha \land \alpha \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq u0 \land u0 \leq 1\right)\]
\[u0 \cdot \left(\alpha \cdot \left(\mathsf{fma}\left(u0, 0.5, 1\right) \cdot \alpha\right)\right) \]
(FPCore (alpha u0)
  :precision binary32
  :pre (and (and (<= 0.0001 alpha) (<= alpha 1.0))
     (and (<= 2.328306437e-10 u0) (<= u0 1.0)))
  (* u0 (* alpha (* (fma u0 0.5 1.0) alpha))))
float code(float alpha, float u0) {
	return u0 * (alpha * (fmaf(u0, 0.5f, 1.0f) * alpha));
}
function code(alpha, u0)
	return Float32(u0 * Float32(alpha * Float32(fma(u0, Float32(0.5), Float32(1.0)) * alpha)))
end
u0 \cdot \left(\alpha \cdot \left(\mathsf{fma}\left(u0, 0.5, 1\right) \cdot \alpha\right)\right)
Derivation
  1. Initial program 56.1%

    \[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]
  2. Taylor expanded in u0 around 0

    \[\leadsto u0 \cdot \left(\frac{1}{2} \cdot \left({\alpha}^{2} \cdot u0\right) + {\alpha}^{2}\right) \]
  3. Applied rewrites87.0%

    \[\leadsto u0 \cdot \mathsf{fma}\left(0.5, {\alpha}^{2} \cdot u0, {\alpha}^{2}\right) \]
  4. Applied rewrites87.0%

    \[\leadsto u0 \cdot \mathsf{fma}\left(u0, 0.5 \cdot \left(\alpha \cdot \alpha\right), \alpha \cdot \alpha\right) \]
  5. Applied rewrites86.8%

    \[\leadsto u0 \cdot \left(\alpha \cdot \left(\mathsf{fma}\left(u0, 0.5, 1\right) \cdot \alpha\right)\right) \]
  6. Add Preprocessing

Alternative 9: 86.8% accurate, 1.1× speedup?

\[\left(0.0001 \leq \alpha \land \alpha \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq u0 \land u0 \leq 1\right)\]
\[\alpha \cdot \left(\left(\alpha \cdot u0\right) \cdot \mathsf{fma}\left(u0, 0.5, 1\right)\right) \]
(FPCore (alpha u0)
  :precision binary32
  :pre (and (and (<= 0.0001 alpha) (<= alpha 1.0))
     (and (<= 2.328306437e-10 u0) (<= u0 1.0)))
  (* alpha (* (* alpha u0) (fma u0 0.5 1.0))))
float code(float alpha, float u0) {
	return alpha * ((alpha * u0) * fmaf(u0, 0.5f, 1.0f));
}
function code(alpha, u0)
	return Float32(alpha * Float32(Float32(alpha * u0) * fma(u0, Float32(0.5), Float32(1.0))))
end
\alpha \cdot \left(\left(\alpha \cdot u0\right) \cdot \mathsf{fma}\left(u0, 0.5, 1\right)\right)
Derivation
  1. Initial program 56.1%

    \[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]
  2. Taylor expanded in u0 around 0

    \[\leadsto u0 \cdot \left(\frac{1}{2} \cdot \left({\alpha}^{2} \cdot u0\right) + {\alpha}^{2}\right) \]
  3. Applied rewrites87.0%

    \[\leadsto u0 \cdot \mathsf{fma}\left(0.5, {\alpha}^{2} \cdot u0, {\alpha}^{2}\right) \]
  4. Applied rewrites87.1%

    \[\leadsto \mathsf{fma}\left(\alpha, \alpha \cdot u0, \left(\left(\alpha \cdot u0\right) \cdot \left(\alpha \cdot 0.5\right)\right) \cdot u0\right) \]
  5. Applied rewrites86.8%

    \[\leadsto \alpha \cdot \left(\left(\alpha \cdot u0\right) \cdot \mathsf{fma}\left(u0, 0.5, 1\right)\right) \]
  6. Add Preprocessing

Alternative 10: 74.3% accurate, 1.8× speedup?

\[\left(0.0001 \leq \alpha \land \alpha \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq u0 \land u0 \leq 1\right)\]
\[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \left(-u0\right) \]
(FPCore (alpha u0)
  :precision binary32
  :pre (and (and (<= 0.0001 alpha) (<= alpha 1.0))
     (and (<= 2.328306437e-10 u0) (<= u0 1.0)))
  (* (* (- alpha) alpha) (- u0)))
float code(float alpha, float u0) {
	return (-alpha * alpha) * -u0;
}
real(4) function code(alpha, u0)
use fmin_fmax_functions
    real(4), intent (in) :: alpha
    real(4), intent (in) :: u0
    code = (-alpha * alpha) * -u0
end function
function code(alpha, u0)
	return Float32(Float32(Float32(-alpha) * alpha) * Float32(-u0))
end
function tmp = code(alpha, u0)
	tmp = (-alpha * alpha) * -u0;
end
\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \left(-u0\right)
Derivation
  1. Initial program 56.1%

    \[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]
  2. Taylor expanded in u0 around 0

    \[\leadsto \left(\left(-\alpha\right) \cdot \alpha\right) \cdot \left(-1 \cdot u0\right) \]
  3. Applied rewrites74.3%

    \[\leadsto \left(\left(-\alpha\right) \cdot \alpha\right) \cdot \left(-1 \cdot u0\right) \]
  4. Applied rewrites74.3%

    \[\leadsto \left(\left(-\alpha\right) \cdot \alpha\right) \cdot \left(-u0\right) \]
  5. Add Preprocessing

Alternative 11: 74.3% accurate, 2.4× speedup?

\[\left(0.0001 \leq \alpha \land \alpha \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq u0 \land u0 \leq 1\right)\]
\[\alpha \cdot \left(\alpha \cdot u0\right) \]
(FPCore (alpha u0)
  :precision binary32
  :pre (and (and (<= 0.0001 alpha) (<= alpha 1.0))
     (and (<= 2.328306437e-10 u0) (<= u0 1.0)))
  (* alpha (* alpha u0)))
float code(float alpha, float u0) {
	return alpha * (alpha * u0);
}
real(4) function code(alpha, u0)
use fmin_fmax_functions
    real(4), intent (in) :: alpha
    real(4), intent (in) :: u0
    code = alpha * (alpha * u0)
end function
function code(alpha, u0)
	return Float32(alpha * Float32(alpha * u0))
end
function tmp = code(alpha, u0)
	tmp = alpha * (alpha * u0);
end
\alpha \cdot \left(\alpha \cdot u0\right)
Derivation
  1. Initial program 56.1%

    \[\left(\left(-\alpha\right) \cdot \alpha\right) \cdot \log \left(1 - u0\right) \]
  2. Applied rewrites56.1%

    \[\leadsto \alpha \cdot \left(\log \left(1 - u0\right) \cdot \left(-\alpha\right)\right) \]
  3. Taylor expanded in u0 around 0

    \[\leadsto \alpha \cdot \left(\alpha \cdot u0\right) \]
  4. Applied rewrites74.3%

    \[\leadsto \alpha \cdot \left(\alpha \cdot u0\right) \]
  5. Add Preprocessing

Reproduce

?
herbie shell --seed 2026089 +o generate:egglog
(FPCore (alpha u0)
  :name "Beckmann Distribution sample, tan2theta, alphax == alphay"
  :precision binary32
  :pre (and (and (<= 0.0001 alpha) (<= alpha 1.0)) (and (<= 2.328306437e-10 u0) (<= u0 1.0)))
  (* (* (- alpha) alpha) (log (- 1.0 u0))))