
(FPCore (a1 a2 th) :precision binary64 (let* ((t_1 (/ (cos th) (sqrt 2.0)))) (+ (* t_1 (* a1 a1)) (* t_1 (* a2 a2)))))
double code(double a1, double a2, double th) {
double t_1 = cos(th) / sqrt(2.0);
return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2));
}
real(8) function code(a1, a2, th)
real(8), intent (in) :: a1
real(8), intent (in) :: a2
real(8), intent (in) :: th
real(8) :: t_1
t_1 = cos(th) / sqrt(2.0d0)
code = (t_1 * (a1 * a1)) + (t_1 * (a2 * a2))
end function
public static double code(double a1, double a2, double th) {
double t_1 = Math.cos(th) / Math.sqrt(2.0);
return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2));
}
def code(a1, a2, th): t_1 = math.cos(th) / math.sqrt(2.0) return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2))
function code(a1, a2, th) t_1 = Float64(cos(th) / sqrt(2.0)) return Float64(Float64(t_1 * Float64(a1 * a1)) + Float64(t_1 * Float64(a2 * a2))) end
function tmp = code(a1, a2, th) t_1 = cos(th) / sqrt(2.0); tmp = (t_1 * (a1 * a1)) + (t_1 * (a2 * a2)); end
code[a1_, a2_, th_] := Block[{t$95$1 = N[(N[Cos[th], $MachinePrecision] / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]}, N[(N[(t$95$1 * N[(a1 * a1), $MachinePrecision]), $MachinePrecision] + N[(t$95$1 * N[(a2 * a2), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]]
\begin{array}{l}
\\
\begin{array}{l}
t_1 := \frac{\cos th}{\sqrt{2}}\\
t\_1 \cdot \left(a1 \cdot a1\right) + t\_1 \cdot \left(a2 \cdot a2\right)
\end{array}
\end{array}
Sampling outcomes in binary64 precision:
Herbie found 13 alternatives:
| Alternative | Accuracy | Speedup |
|---|
(FPCore (a1 a2 th) :precision binary64 (let* ((t_1 (/ (cos th) (sqrt 2.0)))) (+ (* t_1 (* a1 a1)) (* t_1 (* a2 a2)))))
double code(double a1, double a2, double th) {
double t_1 = cos(th) / sqrt(2.0);
return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2));
}
real(8) function code(a1, a2, th)
real(8), intent (in) :: a1
real(8), intent (in) :: a2
real(8), intent (in) :: th
real(8) :: t_1
t_1 = cos(th) / sqrt(2.0d0)
code = (t_1 * (a1 * a1)) + (t_1 * (a2 * a2))
end function
public static double code(double a1, double a2, double th) {
double t_1 = Math.cos(th) / Math.sqrt(2.0);
return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2));
}
def code(a1, a2, th): t_1 = math.cos(th) / math.sqrt(2.0) return (t_1 * (a1 * a1)) + (t_1 * (a2 * a2))
function code(a1, a2, th) t_1 = Float64(cos(th) / sqrt(2.0)) return Float64(Float64(t_1 * Float64(a1 * a1)) + Float64(t_1 * Float64(a2 * a2))) end
function tmp = code(a1, a2, th) t_1 = cos(th) / sqrt(2.0); tmp = (t_1 * (a1 * a1)) + (t_1 * (a2 * a2)); end
code[a1_, a2_, th_] := Block[{t$95$1 = N[(N[Cos[th], $MachinePrecision] / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]}, N[(N[(t$95$1 * N[(a1 * a1), $MachinePrecision]), $MachinePrecision] + N[(t$95$1 * N[(a2 * a2), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]]
\begin{array}{l}
\\
\begin{array}{l}
t_1 := \frac{\cos th}{\sqrt{2}}\\
t\_1 \cdot \left(a1 \cdot a1\right) + t\_1 \cdot \left(a2 \cdot a2\right)
\end{array}
\end{array}
a2_m = (fabs.f64 a2) a1_m = (fabs.f64 a1) NOTE: a1_m, a2_m, and th should be sorted in increasing order before calling this function. (FPCore (a1_m a2_m th) :precision binary64 (* 0.5 (fma (* (* (cos th) a2_m) (sqrt 2.0)) a2_m (* (* a1_m (sqrt 2.0)) (* a1_m (cos th))))))
a2_m = fabs(a2);
a1_m = fabs(a1);
assert(a1_m < a2_m && a2_m < th);
double code(double a1_m, double a2_m, double th) {
return 0.5 * fma(((cos(th) * a2_m) * sqrt(2.0)), a2_m, ((a1_m * sqrt(2.0)) * (a1_m * cos(th))));
}
a2_m = abs(a2) a1_m = abs(a1) a1_m, a2_m, th = sort([a1_m, a2_m, th]) function code(a1_m, a2_m, th) return Float64(0.5 * fma(Float64(Float64(cos(th) * a2_m) * sqrt(2.0)), a2_m, Float64(Float64(a1_m * sqrt(2.0)) * Float64(a1_m * cos(th))))) end
a2_m = N[Abs[a2], $MachinePrecision] a1_m = N[Abs[a1], $MachinePrecision] NOTE: a1_m, a2_m, and th should be sorted in increasing order before calling this function. code[a1$95$m_, a2$95$m_, th_] := N[(0.5 * N[(N[(N[(N[Cos[th], $MachinePrecision] * a2$95$m), $MachinePrecision] * N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision] * a2$95$m + N[(N[(a1$95$m * N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision] * N[(a1$95$m * N[Cos[th], $MachinePrecision]), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]
\begin{array}{l}
a2_m = \left|a2\right|
\\
a1_m = \left|a1\right|
\\
[a1_m, a2_m, th] = \mathsf{sort}([a1_m, a2_m, th])\\
\\
0.5 \cdot \mathsf{fma}\left(\left(\cos th \cdot a2\_m\right) \cdot \sqrt{2}, a2\_m, \left(a1\_m \cdot \sqrt{2}\right) \cdot \left(a1\_m \cdot \cos th\right)\right)
\end{array}
Initial program 99.6%
lift-+.f64N/A
lift-*.f64N/A
lift-/.f64N/A
associate-*l/N/A
lift-*.f64N/A
lift-/.f64N/A
associate-*l/N/A
frac-addN/A
lift-sqrt.f64N/A
lift-sqrt.f64N/A
rem-square-sqrtN/A
div-invN/A
metadata-evalN/A
lower-*.f64N/A
Applied rewrites99.6%
Final simplification99.6%
a2_m = (fabs.f64 a2)
a1_m = (fabs.f64 a1)
NOTE: a1_m, a2_m, and th should be sorted in increasing order before calling this function.
(FPCore (a1_m a2_m th)
:precision binary64
(let* ((t_1 (* (/ a2_m (sqrt 2.0)) a2_m)) (t_2 (/ (cos th) (sqrt 2.0))))
(if (<= (+ (* t_2 (* a2_m a2_m)) (* t_2 (* a1_m a1_m))) -2e-186)
(* t_1 (* (* th th) -0.5))
(fma (/ a1_m (sqrt 2.0)) a1_m t_1))))a2_m = fabs(a2);
a1_m = fabs(a1);
assert(a1_m < a2_m && a2_m < th);
double code(double a1_m, double a2_m, double th) {
double t_1 = (a2_m / sqrt(2.0)) * a2_m;
double t_2 = cos(th) / sqrt(2.0);
double tmp;
if (((t_2 * (a2_m * a2_m)) + (t_2 * (a1_m * a1_m))) <= -2e-186) {
tmp = t_1 * ((th * th) * -0.5);
} else {
tmp = fma((a1_m / sqrt(2.0)), a1_m, t_1);
}
return tmp;
}
a2_m = abs(a2) a1_m = abs(a1) a1_m, a2_m, th = sort([a1_m, a2_m, th]) function code(a1_m, a2_m, th) t_1 = Float64(Float64(a2_m / sqrt(2.0)) * a2_m) t_2 = Float64(cos(th) / sqrt(2.0)) tmp = 0.0 if (Float64(Float64(t_2 * Float64(a2_m * a2_m)) + Float64(t_2 * Float64(a1_m * a1_m))) <= -2e-186) tmp = Float64(t_1 * Float64(Float64(th * th) * -0.5)); else tmp = fma(Float64(a1_m / sqrt(2.0)), a1_m, t_1); end return tmp end
a2_m = N[Abs[a2], $MachinePrecision]
a1_m = N[Abs[a1], $MachinePrecision]
NOTE: a1_m, a2_m, and th should be sorted in increasing order before calling this function.
code[a1$95$m_, a2$95$m_, th_] := Block[{t$95$1 = N[(N[(a2$95$m / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision] * a2$95$m), $MachinePrecision]}, Block[{t$95$2 = N[(N[Cos[th], $MachinePrecision] / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision]}, If[LessEqual[N[(N[(t$95$2 * N[(a2$95$m * a2$95$m), $MachinePrecision]), $MachinePrecision] + N[(t$95$2 * N[(a1$95$m * a1$95$m), $MachinePrecision]), $MachinePrecision]), $MachinePrecision], -2e-186], N[(t$95$1 * N[(N[(th * th), $MachinePrecision] * -0.5), $MachinePrecision]), $MachinePrecision], N[(N[(a1$95$m / N[Sqrt[2.0], $MachinePrecision]), $MachinePrecision] * a1$95$m + t$95$1), $MachinePrecision]]]]
\begin{array}{l}
a2_m = \left|a2\right|
\\
a1_m = \left|a1\right|
\\
[a1_m, a2_m, th] = \mathsf{sort}([a1_m, a2_m, th])\\
\\
\begin{array}{l}
t_1 := \frac{a2\_m}{\sqrt{2}} \cdot a2\_m\\
t_2 := \frac{\cos th}{\sqrt{2}}\\
\mathbf{if}\;t\_2 \cdot \left(a2\_m \cdot a2\_m\right) + t\_2 \cdot \left(a1\_m \cdot a1\_m\right) \leq -2 \cdot 10^{-186}:\\
\;\;\;\;t\_1 \cdot \left(\left(th \cdot th\right) \cdot -0.5\right)\\
\mathbf{else}:\\
\;\;\;\;\mathsf{fma}\left(\frac{a1\_m}{\sqrt{2}}, a1\_m, t\_1\right)\\
\end{array}
\end{array}
if (+.f64 (*.f64 (/.f64 (cos.f64 th) (sqrt.f64 #s(literal 2 binary64))) (*.f64 a1 a1)) (*.f64 (/.f64 (cos.f64 th) (sqrt.f64 #s(literal 2 binary64))) (*.f64 a2 a2))) < -1.9999999999999998e-186Initial program 99.5%
Taylor expanded in a1 around 0
*-commutativeN/A
unpow2N/A
associate-*r*N/A
associate-/l*N/A
lower-*.f64N/A
lower-*.f64N/A
lower-cos.f64N/A
lower-/.f64N/A
lower-sqrt.f6499.0
Applied rewrites99.0%
Taylor expanded in th around 0
Applied rewrites52.0%
Taylor expanded in th around inf
Applied rewrites52.0%
if -1.9999999999999998e-186 < (+.f64 (*.f64 (/.f64 (cos.f64 th) (sqrt.f64 #s(literal 2 binary64))) (*.f64 a1 a1)) (*.f64 (/.f64 (cos.f64 th) (sqrt.f64 #s(literal 2 binary64))) (*.f64 a2 a2))) Initial program 99.6%
Taylor expanded in th around 0
unpow2N/A
associate-*l/N/A
lower-fma.f64N/A
lower-/.f64N/A
lower-sqrt.f64N/A
unpow2N/A
associate-*l/N/A
lower-*.f64N/A
lower-/.f64N/A
lower-sqrt.f6485.3
Applied rewrites85.3%
Final simplification78.1%
herbie shell --seed 2024234
(FPCore (a1 a2 th)
:name "Migdal et al, Equation (64)"
:precision binary64
(+ (* (/ (cos th) (sqrt 2.0)) (* a1 a1)) (* (/ (cos th) (sqrt 2.0)) (* a2 a2))))