
(FPCore (a1 a2 th) :precision binary64 (let* ((t_1 (/ (cos th) (sqrt 2.0)))) (+ (* t_1 (* a1 a1)) (* t_1 (* a2 a2)))))
double code(double a1, double a2, double th) {
double t_1 = cos(th) / sqrt(2.0);
return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2));
}
real(8) function code(a1, a2, th)
real(8), intent (in) :: a1
real(8), intent (in) :: a2
real(8), intent (in) :: th
real(8) :: t_1
t_1 = cos(th) / sqrt(2.0d0)
code = (t_1 * (a1 * a1)) + (t_1 * (a2 * a2))
end function
public static double code(double a1, double a2, double th) {
double t_1 = Math.cos(th) / Math.sqrt(2.0);
return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2));
}
def code(a1, a2, th): t_1 = math.cos(th) / math.sqrt(2.0) return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2))
function code(a1, a2, th) t_1 = Float64(cos(th) / sqrt(2.0)) return Float64(Float64(t_1 * Float64(a1 * a1)) + Float64(t_1 * Float64(a2 * a2))) end
function tmp = code(a1, a2, th) t_1 = cos(th) / sqrt(2.0); tmp = (t_1 * (a1 * a1)) + (t_1 * (a2 * a2)); end
code[a1_, a2_, th_] := Block[{t$95$1 = N[(N[Cos[th], $MachinePrecision] / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]}, N[(N[(t$95$1 * N[(a1 * a1), $MachinePrecision]), $MachinePrecision] + N[(t$95$1 * N[(a2 * a2), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]]
\begin{array}{l}
\\
\begin{array}{l}
t_1 := \frac{\cos th}{\sqrt{2}}\\
t\_1 \cdot \left(a1 \cdot a1\right) + t\_1 \cdot \left(a2 \cdot a2\right)
\end{array}
\end{array}
Sampling outcomes in binary64 precision:
Herbie found 10 alternatives:
| Alternative | Accuracy | Speedup |
|---|
(FPCore (a1 a2 th) :precision binary64 (let* ((t_1 (/ (cos th) (sqrt 2.0)))) (+ (* t_1 (* a1 a1)) (* t_1 (* a2 a2)))))
double code(double a1, double a2, double th) {
double t_1 = cos(th) / sqrt(2.0);
return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2));
}
real(8) function code(a1, a2, th)
real(8), intent (in) :: a1
real(8), intent (in) :: a2
real(8), intent (in) :: th
real(8) :: t_1
t_1 = cos(th) / sqrt(2.0d0)
code = (t_1 * (a1 * a1)) + (t_1 * (a2 * a2))
end function
public static double code(double a1, double a2, double th) {
double t_1 = Math.cos(th) / Math.sqrt(2.0);
return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2));
}
def code(a1, a2, th): t_1 = math.cos(th) / math.sqrt(2.0) return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2))
function code(a1, a2, th) t_1 = Float64(cos(th) / sqrt(2.0)) return Float64(Float64(t_1 * Float64(a1 * a1)) + Float64(t_1 * Float64(a2 * a2))) end
function tmp = code(a1, a2, th) t_1 = cos(th) / sqrt(2.0); tmp = (t_1 * (a1 * a1)) + (t_1 * (a2 * a2)); end
code[a1_, a2_, th_] := Block[{t$95$1 = N[(N[Cos[th], $MachinePrecision] / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]}, N[(N[(t$95$1 * N[(a1 * a1), $MachinePrecision]), $MachinePrecision] + N[(t$95$1 * N[(a2 * a2), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]]
\begin{array}{l}
\\
\begin{array}{l}
t_1 := \frac{\cos th}{\sqrt{2}}\\
t\_1 \cdot \left(a1 \cdot a1\right) + t\_1 \cdot \left(a2 \cdot a2\right)
\end{array}
\end{array}
a2_m = (fabs.f64 a2) a1_m = (fabs.f64 a1) NOTE: a1_m, a2_m, and th should be sorted in increasing order before calling this function. (FPCore (a1_m a2_m th) :precision binary64 (fma (* (cos th) (/ a2_m (sqrt 2.0))) a2_m (* (cos th) (* a1_m (/ a1_m (sqrt 2.0))))))
a2_m = fabs(a2);
a1_m = fabs(a1);
assert(a1_m < a2_m && a2_m < th);
double code(double a1_m, double a2_m, double th) {
return fma((cos(th) * (a2_m / sqrt(2.0))), a2_m, (cos(th) * (a1_m * (a1_m / sqrt(2.0)))));
}
a2_m = abs(a2) a1_m = abs(a1) a1_m, a2_m, th = sort([a1_m, a2_m, th]) function code(a1_m, a2_m, th) return fma(Float64(cos(th) * Float64(a2_m / sqrt(2.0))), a2_m, Float64(cos(th) * Float64(a1_m * Float64(a1_m / sqrt(2.0))))) end
a2_m = N[Abs[a2], $MachinePrecision] a1_m = N[Abs[a1], $MachinePrecision] NOTE: a1_m, a2_m, and th should be sorted in increasing order before calling this function. code[a1$95$m_, a2$95$m_, th_] := N[(N[(N[Cos[th], $MachinePrecision] * N[(a2$95$m / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]), $MachinePrecision] * a2$95$m + N[(N[Cos[th], $MachinePrecision] * N[(a1$95$m * N[(a1$95$m / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]
\begin{array}{l}
a2_m = \left|a2\right|
\\
a1_m = \left|a1\right|
\\
[a1_m, a2_m, th] = \mathsf{sort}([a1_m, a2_m, th])\\
\\
\mathsf{fma}\left(\cos th \cdot \frac{a2\_m}{\sqrt{2}}, a2\_m, \cos th \cdot \left(a1\_m \cdot \frac{a1\_m}{\sqrt{2}}\right)\right)
\end{array}
Initial program 99.5%
lift-+.f64N/A
+-commutativeN/A
lift-*.f64N/A
lift-*.f64N/A
associate-*r*N/A
lower-fma.f64N/A
lift-/.f64N/A
div-invN/A
associate-*l*N/A
lower-*.f64N/A
associate-*l/N/A
*-lft-identityN/A
lower-/.f6499.7
lift-*.f64N/A
lift-/.f64N/A
associate-*l/N/A
associate-/l*N/A
lower-*.f64N/A
Applied rewrites99.7%
a2_m = (fabs.f64 a2)
a1_m = (fabs.f64 a1)
NOTE: a1_m, a2_m, and th should be sorted in increasing order before calling this function.
(FPCore (a1_m a2_m th)
:precision binary64
(let* ((t_1 (/ (cos th) (sqrt 2.0))))
(if (<= (+ (* t_1 (* a1_m a1_m)) (* (* a2_m a2_m) t_1)) -2e-137)
(* a2_m (/ (* a2_m (fma (* th th) -0.5 1.0)) (sqrt 2.0)))
(fma a1_m (/ a1_m (sqrt 2.0)) (/ a2_m (/ (sqrt 2.0) a2_m))))))a2_m = fabs(a2);
a1_m = fabs(a1);
assert(a1_m < a2_m && a2_m < th);
double code(double a1_m, double a2_m, double th) {
double t_1 = cos(th) / sqrt(2.0);
double tmp;
if (((t_1 * (a1_m * a1_m)) + ((a2_m * a2_m) * t_1)) <= -2e-137) {
tmp = a2_m * ((a2_m * fma((th * th), -0.5, 1.0)) / sqrt(2.0));
} else {
tmp = fma(a1_m, (a1_m / sqrt(2.0)), (a2_m / (sqrt(2.0) / a2_m)));
}
return tmp;
}
a2_m = abs(a2) a1_m = abs(a1) a1_m, a2_m, th = sort([a1_m, a2_m, th]) function code(a1_m, a2_m, th) t_1 = Float64(cos(th) / sqrt(2.0)) tmp = 0.0 if (Float64(Float64(t_1 * Float64(a1_m * a1_m)) + Float64(Float64(a2_m * a2_m) * t_1)) <= -2e-137) tmp = Float64(a2_m * Float64(Float64(a2_m * fma(Float64(th * th), -0.5, 1.0)) / sqrt(2.0))); else tmp = fma(a1_m, Float64(a1_m / sqrt(2.0)), Float64(a2_m / Float64(sqrt(2.0) / a2_m))); end return tmp end
a2_m = N[Abs[a2], $MachinePrecision]
a1_m = N[Abs[a1], $MachinePrecision]
NOTE: a1_m, a2_m, and th should be sorted in increasing order before calling this function.
code[a1$95$m_, a2$95$m_, th_] := Block[{t$95$1 = N[(N[Cos[th], $MachinePrecision] / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]}, If[LessEqual[N[(N[(t$95$1 * N[(a1$95$m * a1$95$m), $MachinePrecision]), $MachinePrecision] + N[(N[(a2$95$m * a2$95$m), $MachinePrecision] * t$95$1), $MachinePrecision]), $MachinePrecision], -2e-137], N[(a2$95$m * N[(N[(a2$95$m * N[(N[(th * th), $MachinePrecision] * -0.5 + 1.0), $MachinePrecision]), $MachinePrecision] / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]), $MachinePrecision], N[(a1$95$m * N[(a1$95$m / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision] + N[(a2$95$m / N[(N[Sqrt[2.0], $MachinePrecision] / a2$95$m), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]]]
\begin{array}{l}
a2_m = \left|a2\right|
\\
a1_m = \left|a1\right|
\\
[a1_m, a2_m, th] = \mathsf{sort}([a1_m, a2_m, th])\\
\\
\begin{array}{l}
t_1 := \frac{\cos th}{\sqrt{2}}\\
\mathbf{if}\;t\_1 \cdot \left(a1\_m \cdot a1\_m\right) + \left(a2\_m \cdot a2\_m\right) \cdot t\_1 \leq -2 \cdot 10^{-137}:\\
\;\;\;\;a2\_m \cdot \frac{a2\_m \cdot \mathsf{fma}\left(th \cdot th, -0.5, 1\right)}{\sqrt{2}}\\
\mathbf{else}:\\
\;\;\;\;\mathsf{fma}\left(a1\_m, \frac{a1\_m}{\sqrt{2}}, \frac{a2\_m}{\frac{\sqrt{2}}{a2\_m}}\right)\\
\end{array}
\end{array}
if (+.f64 (*.f64 (/.f64 (cos.f64 th) (sqrt.f64 #s(literal 2 binary64))) (*.f64 a1 a1)) (*.f64 (/.f64 (cos.f64 th) (sqrt.f64 #s(literal 2 binary64))) (*.f64 a2 a2))) < -1.99999999999999996e-137Initial program 99.5%
Taylor expanded in a1 around 0
lower-/.f64N/A
lower-*.f64N/A
unpow2N/A
lower-*.f64N/A
lower-cos.f64N/A
lower-sqrt.f6499.2
Applied rewrites99.2%
Taylor expanded in th around 0
Applied rewrites53.1%
Applied rewrites53.1%
if -1.99999999999999996e-137 < (+.f64 (*.f64 (/.f64 (cos.f64 th) (sqrt.f64 #s(literal 2 binary64))) (*.f64 a1 a1)) (*.f64 (/.f64 (cos.f64 th) (sqrt.f64 #s(literal 2 binary64))) (*.f64 a2 a2))) Initial program 99.5%
Taylor expanded in th around 0
unpow2N/A
associate-/l*N/A
lower-fma.f64N/A
lower-/.f64N/A
lower-sqrt.f64N/A
lower-/.f64N/A
unpow2N/A
lower-*.f64N/A
lower-sqrt.f6484.7
Applied rewrites84.7%
Applied rewrites84.7%
Final simplification78.0%
herbie shell --seed 2024227
(FPCore (a1 a2 th)
:name "Migdal et al, Equation (64)"
:precision binary64
(+ (* (/ (cos th) (sqrt 2.0)) (* a1 a1)) (* (/ (cos th) (sqrt 2.0)) (* a2 a2))))