
(FPCore (a1 a2 th) :precision binary64 (let* ((t_1 (/ (cos th) (sqrt 2.0)))) (+ (* t_1 (* a1 a1)) (* t_1 (* a2 a2)))))
double code(double a1, double a2, double th) {
double t_1 = cos(th) / sqrt(2.0);
return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2));
}
real(8) function code(a1, a2, th)
real(8), intent (in) :: a1
real(8), intent (in) :: a2
real(8), intent (in) :: th
real(8) :: t_1
t_1 = cos(th) / sqrt(2.0d0)
code = (t_1 * (a1 * a1)) + (t_1 * (a2 * a2))
end function
public static double code(double a1, double a2, double th) {
double t_1 = Math.cos(th) / Math.sqrt(2.0);
return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2));
}
def code(a1, a2, th): t_1 = math.cos(th) / math.sqrt(2.0) return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2))
function code(a1, a2, th) t_1 = Float64(cos(th) / sqrt(2.0)) return Float64(Float64(t_1 * Float64(a1 * a1)) + Float64(t_1 * Float64(a2 * a2))) end
function tmp = code(a1, a2, th) t_1 = cos(th) / sqrt(2.0); tmp = (t_1 * (a1 * a1)) + (t_1 * (a2 * a2)); end
code[a1_, a2_, th_] := Block[{t$95$1 = N[(N[Cos[th], $MachinePrecision] / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]}, N[(N[(t$95$1 * N[(a1 * a1), $MachinePrecision]), $MachinePrecision] + N[(t$95$1 * N[(a2 * a2), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]]
\begin{array}{l}
\\
\begin{array}{l}
t_1 := \frac{\cos th}{\sqrt{2}}\\
t\_1 \cdot \left(a1 \cdot a1\right) + t\_1 \cdot \left(a2 \cdot a2\right)
\end{array}
\end{array}
Sampling outcomes in binary64 precision:
Herbie found 12 alternatives:
| Alternative | Accuracy | Speedup |
|---|
(FPCore (a1 a2 th) :precision binary64 (let* ((t_1 (/ (cos th) (sqrt 2.0)))) (+ (* t_1 (* a1 a1)) (* t_1 (* a2 a2)))))
double code(double a1, double a2, double th) {
double t_1 = cos(th) / sqrt(2.0);
return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2));
}
real(8) function code(a1, a2, th)
real(8), intent (in) :: a1
real(8), intent (in) :: a2
real(8), intent (in) :: th
real(8) :: t_1
t_1 = cos(th) / sqrt(2.0d0)
code = (t_1 * (a1 * a1)) + (t_1 * (a2 * a2))
end function
public static double code(double a1, double a2, double th) {
double t_1 = Math.cos(th) / Math.sqrt(2.0);
return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2));
}
def code(a1, a2, th): t_1 = math.cos(th) / math.sqrt(2.0) return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2))
function code(a1, a2, th) t_1 = Float64(cos(th) / sqrt(2.0)) return Float64(Float64(t_1 * Float64(a1 * a1)) + Float64(t_1 * Float64(a2 * a2))) end
function tmp = code(a1, a2, th) t_1 = cos(th) / sqrt(2.0); tmp = (t_1 * (a1 * a1)) + (t_1 * (a2 * a2)); end
code[a1_, a2_, th_] := Block[{t$95$1 = N[(N[Cos[th], $MachinePrecision] / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]}, N[(N[(t$95$1 * N[(a1 * a1), $MachinePrecision]), $MachinePrecision] + N[(t$95$1 * N[(a2 * a2), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]]
\begin{array}{l}
\\
\begin{array}{l}
t_1 := \frac{\cos th}{\sqrt{2}}\\
t\_1 \cdot \left(a1 \cdot a1\right) + t\_1 \cdot \left(a2 \cdot a2\right)
\end{array}
\end{array}
NOTE: a1, a2, and th should be sorted in increasing order before calling this function. (FPCore (a1 a2 th) :precision binary64 (* (* (* 0.5 (fma a2 a2 (* a1 a1))) (sqrt 2.0)) (cos th)))
assert(a1 < a2 && a2 < th);
double code(double a1, double a2, double th) {
return ((0.5 * fma(a2, a2, (a1 * a1))) * sqrt(2.0)) * cos(th);
}
a1, a2, th = sort([a1, a2, th]) function code(a1, a2, th) return Float64(Float64(Float64(0.5 * fma(a2, a2, Float64(a1 * a1))) * sqrt(2.0)) * cos(th)) end
NOTE: a1, a2, and th should be sorted in increasing order before calling this function. code[a1_, a2_, th_] := N[(N[(N[(0.5 * N[(a2 * a2 + N[(a1 * a1), $MachinePrecision]), $MachinePrecision]), $MachinePrecision] * N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision] * N[Cos[th], $MachinePrecision]), $MachinePrecision]
\begin{array}{l}
[a1, a2, th] = \mathsf{sort}([a1, a2, th])\\
\\
\left(\left(0.5 \cdot \mathsf{fma}\left(a2, a2, a1 \cdot a1\right)\right) \cdot \sqrt{2}\right) \cdot \cos th
\end{array}
Initial program 99.6%
lift-+.f64N/A
lift-*.f64N/A
lift-/.f64N/A
associate-*l/N/A
lift-*.f64N/A
lift-/.f64N/A
associate-*l/N/A
frac-addN/A
lift-sqrt.f64N/A
lift-sqrt.f64N/A
rem-square-sqrtN/A
div-invN/A
metadata-evalN/A
lower-*.f64N/A
Applied rewrites99.6%
Taylor expanded in th around inf
*-commutativeN/A
distribute-rgt-outN/A
associate-*l*N/A
lower-*.f64N/A
*-commutativeN/A
lower-*.f64N/A
lower-sqrt.f64N/A
lower-cos.f64N/A
lower-*.f64N/A
unpow2N/A
lower-fma.f64N/A
unpow2N/A
lower-*.f6499.6
Applied rewrites99.6%
Applied rewrites99.7%
NOTE: a1, a2, and th should be sorted in increasing order before calling this function.
(FPCore (a1 a2 th)
:precision binary64
(let* ((t_1 (/ (cos th) (sqrt 2.0))))
(if (<= (+ (* (* a1 a1) t_1) (* (* a2 a2) t_1)) -2e-102)
(* (* a2 a2) (/ (fma th (* th -0.5) 1.0) (sqrt 2.0)))
(fma a1 (/ a1 (sqrt 2.0)) (/ (* a2 a2) (sqrt 2.0))))))assert(a1 < a2 && a2 < th);
double code(double a1, double a2, double th) {
double t_1 = cos(th) / sqrt(2.0);
double tmp;
if ((((a1 * a1) * t_1) + ((a2 * a2) * t_1)) <= -2e-102) {
tmp = (a2 * a2) * (fma(th, (th * -0.5), 1.0) / sqrt(2.0));
} else {
tmp = fma(a1, (a1 / sqrt(2.0)), ((a2 * a2) / sqrt(2.0)));
}
return tmp;
}
a1, a2, th = sort([a1, a2, th]) function code(a1, a2, th) t_1 = Float64(cos(th) / sqrt(2.0)) tmp = 0.0 if (Float64(Float64(Float64(a1 * a1) * t_1) + Float64(Float64(a2 * a2) * t_1)) <= -2e-102) tmp = Float64(Float64(a2 * a2) * Float64(fma(th, Float64(th * -0.5), 1.0) / sqrt(2.0))); else tmp = fma(a1, Float64(a1 / sqrt(2.0)), Float64(Float64(a2 * a2) / sqrt(2.0))); end return tmp end
NOTE: a1, a2, and th should be sorted in increasing order before calling this function.
code[a1_, a2_, th_] := Block[{t$95$1 = N[(N[Cos[th], $MachinePrecision] / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]}, If[LessEqual[N[(N[(N[(a1 * a1), $MachinePrecision] * t$95$1), $MachinePrecision] + N[(N[(a2 * a2), $MachinePrecision] * t$95$1), $MachinePrecision]), $MachinePrecision], -2e-102], N[(N[(a2 * a2), $MachinePrecision] * N[(N[(th * N[(th * -0.5), $MachinePrecision] + 1.0), $MachinePrecision] / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]), $MachinePrecision], N[(a1 * N[(a1 / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision] + N[(N[(a2 * a2), $MachinePrecision] / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]), $MachinePrecision]]]
\begin{array}{l}
[a1, a2, th] = \mathsf{sort}([a1, a2, th])\\
\\
\begin{array}{l}
t_1 := \frac{\cos th}{\sqrt{2}}\\
\mathbf{if}\;\left(a1 \cdot a1\right) \cdot t\_1 + \left(a2 \cdot a2\right) \cdot t\_1 \leq -2 \cdot 10^{-102}:\\
\;\;\;\;\left(a2 \cdot a2\right) \cdot \frac{\mathsf{fma}\left(th, th \cdot -0.5, 1\right)}{\sqrt{2}}\\
\mathbf{else}:\\
\;\;\;\;\mathsf{fma}\left(a1, \frac{a1}{\sqrt{2}}, \frac{a2 \cdot a2}{\sqrt{2}}\right)\\
\end{array}
\end{array}
if (+.f64 (*.f64 (/.f64 (cos.f64 th) (sqrt.f64 #s(literal 2 binary64))) (*.f64 a1 a1)) (*.f64 (/.f64 (cos.f64 th) (sqrt.f64 #s(literal 2 binary64))) (*.f64 a2 a2))) < -1.99999999999999987e-102Initial program 99.5%
lift-+.f64N/A
lift-*.f64N/A
lift-*.f64N/A
distribute-lft-outN/A
*-commutativeN/A
lower-*.f64N/A
lift-*.f64N/A
lower-fma.f6499.5
Applied rewrites99.5%
Taylor expanded in th around 0
+-commutativeN/A
*-commutativeN/A
unpow2N/A
associate-*l*N/A
lower-fma.f64N/A
lower-*.f6454.3
Applied rewrites54.3%
Taylor expanded in a1 around 0
unpow2N/A
lower-*.f6441.5
Applied rewrites41.5%
if -1.99999999999999987e-102 < (+.f64 (*.f64 (/.f64 (cos.f64 th) (sqrt.f64 #s(literal 2 binary64))) (*.f64 a1 a1)) (*.f64 (/.f64 (cos.f64 th) (sqrt.f64 #s(literal 2 binary64))) (*.f64 a2 a2))) Initial program 99.5%
Taylor expanded in th around 0
unpow2N/A
associate-/l*N/A
lower-fma.f64N/A
lower-/.f64N/A
lower-sqrt.f64N/A
lower-/.f64N/A
unpow2N/A
lower-*.f64N/A
lower-sqrt.f6483.9
Applied rewrites83.9%
Final simplification75.3%
herbie shell --seed 2024225
(FPCore (a1 a2 th)
:name "Migdal et al, Equation (64)"
:precision binary64
(+ (* (/ (cos th) (sqrt 2.0)) (* a1 a1)) (* (/ (cos th) (sqrt 2.0)) (* a2 a2))))