Toniolo and Linder, Equation (3a)

Percentage Accurate: 98.4% → 99.5%
Time: 11.6s
Alternatives: 5
Speedup: 2.3×

Specification

?
\[\begin{array}{l} \\ \sqrt{\frac{1}{2} \cdot \left(1 + \frac{1}{\sqrt{1 + {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} \cdot \left({\sin kx}^{2} + {\sin ky}^{2}\right)}}\right)} \end{array} \]
(FPCore (l Om kx ky)
 :precision binary64
 (sqrt
  (*
   (/ 1.0 2.0)
   (+
    1.0
    (/
     1.0
     (sqrt
      (+
       1.0
       (*
        (pow (/ (* 2.0 l) Om) 2.0)
        (+ (pow (sin kx) 2.0) (pow (sin ky) 2.0))))))))))
double code(double l, double Om, double kx, double ky) {
	return sqrt(((1.0 / 2.0) * (1.0 + (1.0 / sqrt((1.0 + (pow(((2.0 * l) / Om), 2.0) * (pow(sin(kx), 2.0) + pow(sin(ky), 2.0)))))))));
}
real(8) function code(l, om, kx, ky)
    real(8), intent (in) :: l
    real(8), intent (in) :: om
    real(8), intent (in) :: kx
    real(8), intent (in) :: ky
    code = sqrt(((1.0d0 / 2.0d0) * (1.0d0 + (1.0d0 / sqrt((1.0d0 + ((((2.0d0 * l) / om) ** 2.0d0) * ((sin(kx) ** 2.0d0) + (sin(ky) ** 2.0d0)))))))))
end function
public static double code(double l, double Om, double kx, double ky) {
	return Math.sqrt(((1.0 / 2.0) * (1.0 + (1.0 / Math.sqrt((1.0 + (Math.pow(((2.0 * l) / Om), 2.0) * (Math.pow(Math.sin(kx), 2.0) + Math.pow(Math.sin(ky), 2.0)))))))));
}
def code(l, Om, kx, ky):
	return math.sqrt(((1.0 / 2.0) * (1.0 + (1.0 / math.sqrt((1.0 + (math.pow(((2.0 * l) / Om), 2.0) * (math.pow(math.sin(kx), 2.0) + math.pow(math.sin(ky), 2.0)))))))))
function code(l, Om, kx, ky)
	return sqrt(Float64(Float64(1.0 / 2.0) * Float64(1.0 + Float64(1.0 / sqrt(Float64(1.0 + Float64((Float64(Float64(2.0 * l) / Om) ^ 2.0) * Float64((sin(kx) ^ 2.0) + (sin(ky) ^ 2.0)))))))))
end
function tmp = code(l, Om, kx, ky)
	tmp = sqrt(((1.0 / 2.0) * (1.0 + (1.0 / sqrt((1.0 + ((((2.0 * l) / Om) ^ 2.0) * ((sin(kx) ^ 2.0) + (sin(ky) ^ 2.0)))))))));
end
code[l_, Om_, kx_, ky_] := N[Sqrt[N[(N[(1.0 / 2.0), $MachinePrecision] * N[(1.0 + N[(1.0 / N[Sqrt[N[(1.0 + N[(N[Power[N[(N[(2.0 * l), $MachinePrecision] / Om), $MachinePrecision], 2.0], $MachinePrecision] * N[(N[Power[N[Sin[kx], $MachinePrecision], 2.0], $MachinePrecision] + N[Power[N[Sin[ky], $MachinePrecision], 2.0], $MachinePrecision]), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]], $MachinePrecision]), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]], $MachinePrecision]
\begin{array}{l}

\\
\sqrt{\frac{1}{2} \cdot \left(1 + \frac{1}{\sqrt{1 + {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} \cdot \left({\sin kx}^{2} + {\sin ky}^{2}\right)}}\right)}
\end{array}

Sampling outcomes in binary64 precision:

Local Percentage Accuracy vs ?

The average percentage accuracy by input value. Horizontal axis shows value of an input variable; the variable is choosen in the title. Vertical axis is accuracy; higher is better. Red represent the original program, while blue represents Herbie's suggestion. These can be toggled with buttons below the plot. The line is an average while dots represent individual samples.

Accuracy vs Speed?

Herbie found 5 alternatives:

AlternativeAccuracySpeedup
The accuracy (vertical axis) and speed (horizontal axis) of each alternatives. Up and to the right is better. The red square shows the initial program, and each blue circle shows an alternative.The line shows the best available speed-accuracy tradeoffs.

Initial Program: 98.4% accurate, 1.0× speedup?

\[\begin{array}{l} \\ \sqrt{\frac{1}{2} \cdot \left(1 + \frac{1}{\sqrt{1 + {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} \cdot \left({\sin kx}^{2} + {\sin ky}^{2}\right)}}\right)} \end{array} \]
(FPCore (l Om kx ky)
 :precision binary64
 (sqrt
  (*
   (/ 1.0 2.0)
   (+
    1.0
    (/
     1.0
     (sqrt
      (+
       1.0
       (*
        (pow (/ (* 2.0 l) Om) 2.0)
        (+ (pow (sin kx) 2.0) (pow (sin ky) 2.0))))))))))
double code(double l, double Om, double kx, double ky) {
	return sqrt(((1.0 / 2.0) * (1.0 + (1.0 / sqrt((1.0 + (pow(((2.0 * l) / Om), 2.0) * (pow(sin(kx), 2.0) + pow(sin(ky), 2.0)))))))));
}
real(8) function code(l, om, kx, ky)
    real(8), intent (in) :: l
    real(8), intent (in) :: om
    real(8), intent (in) :: kx
    real(8), intent (in) :: ky
    code = sqrt(((1.0d0 / 2.0d0) * (1.0d0 + (1.0d0 / sqrt((1.0d0 + ((((2.0d0 * l) / om) ** 2.0d0) * ((sin(kx) ** 2.0d0) + (sin(ky) ** 2.0d0)))))))))
end function
public static double code(double l, double Om, double kx, double ky) {
	return Math.sqrt(((1.0 / 2.0) * (1.0 + (1.0 / Math.sqrt((1.0 + (Math.pow(((2.0 * l) / Om), 2.0) * (Math.pow(Math.sin(kx), 2.0) + Math.pow(Math.sin(ky), 2.0)))))))));
}
def code(l, Om, kx, ky):
	return math.sqrt(((1.0 / 2.0) * (1.0 + (1.0 / math.sqrt((1.0 + (math.pow(((2.0 * l) / Om), 2.0) * (math.pow(math.sin(kx), 2.0) + math.pow(math.sin(ky), 2.0)))))))))
function code(l, Om, kx, ky)
	return sqrt(Float64(Float64(1.0 / 2.0) * Float64(1.0 + Float64(1.0 / sqrt(Float64(1.0 + Float64((Float64(Float64(2.0 * l) / Om) ^ 2.0) * Float64((sin(kx) ^ 2.0) + (sin(ky) ^ 2.0)))))))))
end
function tmp = code(l, Om, kx, ky)
	tmp = sqrt(((1.0 / 2.0) * (1.0 + (1.0 / sqrt((1.0 + ((((2.0 * l) / Om) ^ 2.0) * ((sin(kx) ^ 2.0) + (sin(ky) ^ 2.0)))))))));
end
code[l_, Om_, kx_, ky_] := N[Sqrt[N[(N[(1.0 / 2.0), $MachinePrecision] * N[(1.0 + N[(1.0 / N[Sqrt[N[(1.0 + N[(N[Power[N[(N[(2.0 * l), $MachinePrecision] / Om), $MachinePrecision], 2.0], $MachinePrecision] * N[(N[Power[N[Sin[kx], $MachinePrecision], 2.0], $MachinePrecision] + N[Power[N[Sin[ky], $MachinePrecision], 2.0], $MachinePrecision]), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]], $MachinePrecision]), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]], $MachinePrecision]
\begin{array}{l}

\\
\sqrt{\frac{1}{2} \cdot \left(1 + \frac{1}{\sqrt{1 + {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} \cdot \left({\sin kx}^{2} + {\sin ky}^{2}\right)}}\right)}
\end{array}

Alternative 1: 99.5% accurate, 1.1× speedup?

\[\begin{array}{l} ky_m = \left|ky\right| \\ kx_m = \left|kx\right| \\ [l, Om, kx_m, ky_m] = \mathsf{sort}([l, Om, kx_m, ky_m])\\ \\ \sqrt{\mathsf{fma}\left({\left({\left({\left(\mathsf{fma}\left({\left(\frac{\ell}{Om} \cdot \sin ky\_m\right)}^{2}, 4, 1\right)\right)}^{-0.5}\right)}^{-1}\right)}^{-1}, 0.5, 0.5\right)} \end{array} \]
ky_m = (fabs.f64 ky)
kx_m = (fabs.f64 kx)
NOTE: l, Om, kx_m, and ky_m should be sorted in increasing order before calling this function.
(FPCore (l Om kx_m ky_m)
 :precision binary64
 (sqrt
  (fma
   (pow
    (pow (pow (fma (pow (* (/ l Om) (sin ky_m)) 2.0) 4.0 1.0) -0.5) -1.0)
    -1.0)
   0.5
   0.5)))
ky_m = fabs(ky);
kx_m = fabs(kx);
assert(l < Om && Om < kx_m && kx_m < ky_m);
double code(double l, double Om, double kx_m, double ky_m) {
	return sqrt(fma(pow(pow(pow(fma(pow(((l / Om) * sin(ky_m)), 2.0), 4.0, 1.0), -0.5), -1.0), -1.0), 0.5, 0.5));
}
ky_m = abs(ky)
kx_m = abs(kx)
l, Om, kx_m, ky_m = sort([l, Om, kx_m, ky_m])
function code(l, Om, kx_m, ky_m)
	return sqrt(fma((((fma((Float64(Float64(l / Om) * sin(ky_m)) ^ 2.0), 4.0, 1.0) ^ -0.5) ^ -1.0) ^ -1.0), 0.5, 0.5))
end
ky_m = N[Abs[ky], $MachinePrecision]
kx_m = N[Abs[kx], $MachinePrecision]
NOTE: l, Om, kx_m, and ky_m should be sorted in increasing order before calling this function.
code[l_, Om_, kx$95$m_, ky$95$m_] := N[Sqrt[N[(N[Power[N[Power[N[Power[N[(N[Power[N[(N[(l / Om), $MachinePrecision] * N[Sin[ky$95$m], $MachinePrecision]), $MachinePrecision], 2.0], $MachinePrecision] * 4.0 + 1.0), $MachinePrecision], -0.5], $MachinePrecision], -1.0], $MachinePrecision], -1.0], $MachinePrecision] * 0.5 + 0.5), $MachinePrecision]], $MachinePrecision]
\begin{array}{l}
ky_m = \left|ky\right|
\\
kx_m = \left|kx\right|
\\
[l, Om, kx_m, ky_m] = \mathsf{sort}([l, Om, kx_m, ky_m])\\
\\
\sqrt{\mathsf{fma}\left({\left({\left({\left(\mathsf{fma}\left({\left(\frac{\ell}{Om} \cdot \sin ky\_m\right)}^{2}, 4, 1\right)\right)}^{-0.5}\right)}^{-1}\right)}^{-1}, 0.5, 0.5\right)}
\end{array}
Derivation
  1. Initial program 99.2%

    \[\sqrt{\frac{1}{2} \cdot \left(1 + \frac{1}{\sqrt{1 + {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} \cdot \left({\sin kx}^{2} + {\sin ky}^{2}\right)}}\right)} \]
  2. Add Preprocessing
  3. Taylor expanded in kx around 0

    \[\leadsto \sqrt{\color{blue}{\frac{1}{2} \cdot \left(1 + \sqrt{\frac{1}{1 + 4 \cdot \frac{{\ell}^{2} \cdot {\sin ky}^{2}}{{Om}^{2}}}}\right)}} \]
  4. Step-by-step derivation
    1. +-commutativeN/A

      \[\leadsto \sqrt{\frac{1}{2} \cdot \color{blue}{\left(\sqrt{\frac{1}{1 + 4 \cdot \frac{{\ell}^{2} \cdot {\sin ky}^{2}}{{Om}^{2}}}} + 1\right)}} \]
    2. distribute-rgt-inN/A

      \[\leadsto \sqrt{\color{blue}{\sqrt{\frac{1}{1 + 4 \cdot \frac{{\ell}^{2} \cdot {\sin ky}^{2}}{{Om}^{2}}}} \cdot \frac{1}{2} + 1 \cdot \frac{1}{2}}} \]
    3. metadata-evalN/A

      \[\leadsto \sqrt{\sqrt{\frac{1}{1 + 4 \cdot \frac{{\ell}^{2} \cdot {\sin ky}^{2}}{{Om}^{2}}}} \cdot \frac{1}{2} + \color{blue}{\frac{1}{2}}} \]
    4. lower-fma.f64N/A

      \[\leadsto \sqrt{\color{blue}{\mathsf{fma}\left(\sqrt{\frac{1}{1 + 4 \cdot \frac{{\ell}^{2} \cdot {\sin ky}^{2}}{{Om}^{2}}}}, \frac{1}{2}, \frac{1}{2}\right)}} \]
  5. Applied rewrites80.1%

    \[\leadsto \sqrt{\color{blue}{\mathsf{fma}\left(\sqrt{\frac{1}{\mathsf{fma}\left(4 \cdot \left(\ell \cdot \ell\right), \frac{{\sin ky}^{2}}{Om \cdot Om}, 1\right)}}, 0.5, 0.5\right)}} \]
  6. Applied rewrites96.2%

    \[\leadsto \sqrt{\mathsf{fma}\left(\frac{1}{\sqrt{-\left(\left(-{\left(\sin ky \cdot \frac{\ell \cdot 2}{Om}\right)}^{2}\right) + -1\right)}}, 0.5, 0.5\right)} \]
  7. Applied rewrites96.2%

    \[\leadsto \sqrt{\mathsf{fma}\left(\frac{1}{\frac{1}{{\left(\mathsf{fma}\left({\left(\sin ky \cdot \frac{\ell}{Om}\right)}^{2}, 4, 1\right)\right)}^{-0.5}}}, 0.5, 0.5\right)} \]
  8. Final simplification96.2%

    \[\leadsto \sqrt{\mathsf{fma}\left({\left({\left({\left(\mathsf{fma}\left({\left(\frac{\ell}{Om} \cdot \sin ky\right)}^{2}, 4, 1\right)\right)}^{-0.5}\right)}^{-1}\right)}^{-1}, 0.5, 0.5\right)} \]
  9. Add Preprocessing

Alternative 2: 97.8% accurate, 0.7× speedup?

\[\begin{array}{l} ky_m = \left|ky\right| \\ kx_m = \left|kx\right| \\ [l, Om, kx_m, ky_m] = \mathsf{sort}([l, Om, kx_m, ky_m])\\ \\ \begin{array}{l} \mathbf{if}\;{\left(\sqrt{\left({\sin ky\_m}^{2} + {\sin kx\_m}^{2}\right) \cdot {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} + 1}\right)}^{-1} \leq 0.002:\\ \;\;\;\;\sqrt{\mathsf{fma}\left(\frac{Om}{\ell \cdot \sin ky\_m}, -0.25, 0.5\right)}\\ \mathbf{else}:\\ \;\;\;\;\sqrt{1}\\ \end{array} \end{array} \]
ky_m = (fabs.f64 ky)
kx_m = (fabs.f64 kx)
NOTE: l, Om, kx_m, and ky_m should be sorted in increasing order before calling this function.
(FPCore (l Om kx_m ky_m)
 :precision binary64
 (if (<=
      (pow
       (sqrt
        (+
         (*
          (+ (pow (sin ky_m) 2.0) (pow (sin kx_m) 2.0))
          (pow (/ (* 2.0 l) Om) 2.0))
         1.0))
       -1.0)
      0.002)
   (sqrt (fma (/ Om (* l (sin ky_m))) -0.25 0.5))
   (sqrt 1.0)))
ky_m = fabs(ky);
kx_m = fabs(kx);
assert(l < Om && Om < kx_m && kx_m < ky_m);
double code(double l, double Om, double kx_m, double ky_m) {
	double tmp;
	if (pow(sqrt((((pow(sin(ky_m), 2.0) + pow(sin(kx_m), 2.0)) * pow(((2.0 * l) / Om), 2.0)) + 1.0)), -1.0) <= 0.002) {
		tmp = sqrt(fma((Om / (l * sin(ky_m))), -0.25, 0.5));
	} else {
		tmp = sqrt(1.0);
	}
	return tmp;
}
ky_m = abs(ky)
kx_m = abs(kx)
l, Om, kx_m, ky_m = sort([l, Om, kx_m, ky_m])
function code(l, Om, kx_m, ky_m)
	tmp = 0.0
	if ((sqrt(Float64(Float64(Float64((sin(ky_m) ^ 2.0) + (sin(kx_m) ^ 2.0)) * (Float64(Float64(2.0 * l) / Om) ^ 2.0)) + 1.0)) ^ -1.0) <= 0.002)
		tmp = sqrt(fma(Float64(Om / Float64(l * sin(ky_m))), -0.25, 0.5));
	else
		tmp = sqrt(1.0);
	end
	return tmp
end
ky_m = N[Abs[ky], $MachinePrecision]
kx_m = N[Abs[kx], $MachinePrecision]
NOTE: l, Om, kx_m, and ky_m should be sorted in increasing order before calling this function.
code[l_, Om_, kx$95$m_, ky$95$m_] := If[LessEqual[N[Power[N[Sqrt[N[(N[(N[(N[Power[N[Sin[ky$95$m], $MachinePrecision], 2.0], $MachinePrecision] + N[Power[N[Sin[kx$95$m], $MachinePrecision], 2.0], $MachinePrecision]), $MachinePrecision] * N[Power[N[(N[(2.0 * l), $MachinePrecision] / Om), $MachinePrecision], 2.0], $MachinePrecision]), $MachinePrecision] + 1.0), $MachinePrecision]], $MachinePrecision], -1.0], $MachinePrecision], 0.002], N[Sqrt[N[(N[(Om / N[(l * N[Sin[ky$95$m], $MachinePrecision]), $MachinePrecision]), $MachinePrecision] * -0.25 + 0.5), $MachinePrecision]], $MachinePrecision], N[Sqrt[1.0], $MachinePrecision]]
\begin{array}{l}
ky_m = \left|ky\right|
\\
kx_m = \left|kx\right|
\\
[l, Om, kx_m, ky_m] = \mathsf{sort}([l, Om, kx_m, ky_m])\\
\\
\begin{array}{l}
\mathbf{if}\;{\left(\sqrt{\left({\sin ky\_m}^{2} + {\sin kx\_m}^{2}\right) \cdot {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} + 1}\right)}^{-1} \leq 0.002:\\
\;\;\;\;\sqrt{\mathsf{fma}\left(\frac{Om}{\ell \cdot \sin ky\_m}, -0.25, 0.5\right)}\\

\mathbf{else}:\\
\;\;\;\;\sqrt{1}\\


\end{array}
\end{array}
Derivation
  1. Split input into 2 regimes
  2. if (/.f64 #s(literal 1 binary64) (sqrt.f64 (+.f64 #s(literal 1 binary64) (*.f64 (pow.f64 (/.f64 (*.f64 #s(literal 2 binary64) l) Om) #s(literal 2 binary64)) (+.f64 (pow.f64 (sin.f64 kx) #s(literal 2 binary64)) (pow.f64 (sin.f64 ky) #s(literal 2 binary64))))))) < 2e-3

    1. Initial program 100.0%

      \[\sqrt{\frac{1}{2} \cdot \left(1 + \frac{1}{\sqrt{1 + {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} \cdot \left({\sin kx}^{2} + {\sin ky}^{2}\right)}}\right)} \]
    2. Add Preprocessing
    3. Taylor expanded in kx around 0

      \[\leadsto \sqrt{\color{blue}{\frac{1}{2} \cdot \left(1 + \sqrt{\frac{1}{1 + 4 \cdot \frac{{\ell}^{2} \cdot {\sin ky}^{2}}{{Om}^{2}}}}\right)}} \]
    4. Step-by-step derivation
      1. +-commutativeN/A

        \[\leadsto \sqrt{\frac{1}{2} \cdot \color{blue}{\left(\sqrt{\frac{1}{1 + 4 \cdot \frac{{\ell}^{2} \cdot {\sin ky}^{2}}{{Om}^{2}}}} + 1\right)}} \]
      2. distribute-rgt-inN/A

        \[\leadsto \sqrt{\color{blue}{\sqrt{\frac{1}{1 + 4 \cdot \frac{{\ell}^{2} \cdot {\sin ky}^{2}}{{Om}^{2}}}} \cdot \frac{1}{2} + 1 \cdot \frac{1}{2}}} \]
      3. metadata-evalN/A

        \[\leadsto \sqrt{\sqrt{\frac{1}{1 + 4 \cdot \frac{{\ell}^{2} \cdot {\sin ky}^{2}}{{Om}^{2}}}} \cdot \frac{1}{2} + \color{blue}{\frac{1}{2}}} \]
      4. lower-fma.f64N/A

        \[\leadsto \sqrt{\color{blue}{\mathsf{fma}\left(\sqrt{\frac{1}{1 + 4 \cdot \frac{{\ell}^{2} \cdot {\sin ky}^{2}}{{Om}^{2}}}}, \frac{1}{2}, \frac{1}{2}\right)}} \]
    5. Applied rewrites73.6%

      \[\leadsto \sqrt{\color{blue}{\mathsf{fma}\left(\sqrt{\frac{1}{\mathsf{fma}\left(4 \cdot \left(\ell \cdot \ell\right), \frac{{\sin ky}^{2}}{Om \cdot Om}, 1\right)}}, 0.5, 0.5\right)}} \]
    6. Taylor expanded in l around -inf

      \[\leadsto \sqrt{\frac{1}{2} + \color{blue}{\frac{-1}{4} \cdot \frac{Om}{\ell \cdot \sin ky}}} \]
    7. Step-by-step derivation
      1. Applied rewrites90.2%

        \[\leadsto \sqrt{\mathsf{fma}\left(\frac{Om}{\sin ky \cdot \ell}, \color{blue}{-0.25}, 0.5\right)} \]

      if 2e-3 < (/.f64 #s(literal 1 binary64) (sqrt.f64 (+.f64 #s(literal 1 binary64) (*.f64 (pow.f64 (/.f64 (*.f64 #s(literal 2 binary64) l) Om) #s(literal 2 binary64)) (+.f64 (pow.f64 (sin.f64 kx) #s(literal 2 binary64)) (pow.f64 (sin.f64 ky) #s(literal 2 binary64)))))))

      1. Initial program 98.6%

        \[\sqrt{\frac{1}{2} \cdot \left(1 + \frac{1}{\sqrt{1 + {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} \cdot \left({\sin kx}^{2} + {\sin ky}^{2}\right)}}\right)} \]
      2. Add Preprocessing
      3. Taylor expanded in Om around inf

        \[\leadsto \sqrt{\color{blue}{1}} \]
      4. Step-by-step derivation
        1. Applied rewrites97.8%

          \[\leadsto \sqrt{\color{blue}{1}} \]
      5. Recombined 2 regimes into one program.
      6. Final simplification94.5%

        \[\leadsto \begin{array}{l} \mathbf{if}\;{\left(\sqrt{\left({\sin ky}^{2} + {\sin kx}^{2}\right) \cdot {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} + 1}\right)}^{-1} \leq 0.002:\\ \;\;\;\;\sqrt{\mathsf{fma}\left(\frac{Om}{\ell \cdot \sin ky}, -0.25, 0.5\right)}\\ \mathbf{else}:\\ \;\;\;\;\sqrt{1}\\ \end{array} \]
      7. Add Preprocessing

      Alternative 3: 97.7% accurate, 0.9× speedup?

      \[\begin{array}{l} ky_m = \left|ky\right| \\ kx_m = \left|kx\right| \\ [l, Om, kx_m, ky_m] = \mathsf{sort}([l, Om, kx_m, ky_m])\\ \\ \begin{array}{l} \mathbf{if}\;{\left(\sqrt{\left({\sin ky\_m}^{2} + {\sin kx\_m}^{2}\right) \cdot {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} + 1}\right)}^{-1} \leq 0.46:\\ \;\;\;\;\sqrt{0.5}\\ \mathbf{else}:\\ \;\;\;\;\sqrt{1}\\ \end{array} \end{array} \]
      ky_m = (fabs.f64 ky)
      kx_m = (fabs.f64 kx)
      NOTE: l, Om, kx_m, and ky_m should be sorted in increasing order before calling this function.
      (FPCore (l Om kx_m ky_m)
       :precision binary64
       (if (<=
            (pow
             (sqrt
              (+
               (*
                (+ (pow (sin ky_m) 2.0) (pow (sin kx_m) 2.0))
                (pow (/ (* 2.0 l) Om) 2.0))
               1.0))
             -1.0)
            0.46)
         (sqrt 0.5)
         (sqrt 1.0)))
      ky_m = fabs(ky);
      kx_m = fabs(kx);
      assert(l < Om && Om < kx_m && kx_m < ky_m);
      double code(double l, double Om, double kx_m, double ky_m) {
      	double tmp;
      	if (pow(sqrt((((pow(sin(ky_m), 2.0) + pow(sin(kx_m), 2.0)) * pow(((2.0 * l) / Om), 2.0)) + 1.0)), -1.0) <= 0.46) {
      		tmp = sqrt(0.5);
      	} else {
      		tmp = sqrt(1.0);
      	}
      	return tmp;
      }
      
      ky_m = abs(ky)
      kx_m = abs(kx)
      NOTE: l, Om, kx_m, and ky_m should be sorted in increasing order before calling this function.
      real(8) function code(l, om, kx_m, ky_m)
          real(8), intent (in) :: l
          real(8), intent (in) :: om
          real(8), intent (in) :: kx_m
          real(8), intent (in) :: ky_m
          real(8) :: tmp
          if ((sqrt(((((sin(ky_m) ** 2.0d0) + (sin(kx_m) ** 2.0d0)) * (((2.0d0 * l) / om) ** 2.0d0)) + 1.0d0)) ** (-1.0d0)) <= 0.46d0) then
              tmp = sqrt(0.5d0)
          else
              tmp = sqrt(1.0d0)
          end if
          code = tmp
      end function
      
      ky_m = Math.abs(ky);
      kx_m = Math.abs(kx);
      assert l < Om && Om < kx_m && kx_m < ky_m;
      public static double code(double l, double Om, double kx_m, double ky_m) {
      	double tmp;
      	if (Math.pow(Math.sqrt((((Math.pow(Math.sin(ky_m), 2.0) + Math.pow(Math.sin(kx_m), 2.0)) * Math.pow(((2.0 * l) / Om), 2.0)) + 1.0)), -1.0) <= 0.46) {
      		tmp = Math.sqrt(0.5);
      	} else {
      		tmp = Math.sqrt(1.0);
      	}
      	return tmp;
      }
      
      ky_m = math.fabs(ky)
      kx_m = math.fabs(kx)
      [l, Om, kx_m, ky_m] = sort([l, Om, kx_m, ky_m])
      def code(l, Om, kx_m, ky_m):
      	tmp = 0
      	if math.pow(math.sqrt((((math.pow(math.sin(ky_m), 2.0) + math.pow(math.sin(kx_m), 2.0)) * math.pow(((2.0 * l) / Om), 2.0)) + 1.0)), -1.0) <= 0.46:
      		tmp = math.sqrt(0.5)
      	else:
      		tmp = math.sqrt(1.0)
      	return tmp
      
      ky_m = abs(ky)
      kx_m = abs(kx)
      l, Om, kx_m, ky_m = sort([l, Om, kx_m, ky_m])
      function code(l, Om, kx_m, ky_m)
      	tmp = 0.0
      	if ((sqrt(Float64(Float64(Float64((sin(ky_m) ^ 2.0) + (sin(kx_m) ^ 2.0)) * (Float64(Float64(2.0 * l) / Om) ^ 2.0)) + 1.0)) ^ -1.0) <= 0.46)
      		tmp = sqrt(0.5);
      	else
      		tmp = sqrt(1.0);
      	end
      	return tmp
      end
      
      ky_m = abs(ky);
      kx_m = abs(kx);
      l, Om, kx_m, ky_m = num2cell(sort([l, Om, kx_m, ky_m])){:}
      function tmp_2 = code(l, Om, kx_m, ky_m)
      	tmp = 0.0;
      	if ((sqrt(((((sin(ky_m) ^ 2.0) + (sin(kx_m) ^ 2.0)) * (((2.0 * l) / Om) ^ 2.0)) + 1.0)) ^ -1.0) <= 0.46)
      		tmp = sqrt(0.5);
      	else
      		tmp = sqrt(1.0);
      	end
      	tmp_2 = tmp;
      end
      
      ky_m = N[Abs[ky], $MachinePrecision]
      kx_m = N[Abs[kx], $MachinePrecision]
      NOTE: l, Om, kx_m, and ky_m should be sorted in increasing order before calling this function.
      code[l_, Om_, kx$95$m_, ky$95$m_] := If[LessEqual[N[Power[N[Sqrt[N[(N[(N[(N[Power[N[Sin[ky$95$m], $MachinePrecision], 2.0], $MachinePrecision] + N[Power[N[Sin[kx$95$m], $MachinePrecision], 2.0], $MachinePrecision]), $MachinePrecision] * N[Power[N[(N[(2.0 * l), $MachinePrecision] / Om), $MachinePrecision], 2.0], $MachinePrecision]), $MachinePrecision] + 1.0), $MachinePrecision]], $MachinePrecision], -1.0], $MachinePrecision], 0.46], N[Sqrt[0.5], $MachinePrecision], N[Sqrt[1.0], $MachinePrecision]]
      
      \begin{array}{l}
      ky_m = \left|ky\right|
      \\
      kx_m = \left|kx\right|
      \\
      [l, Om, kx_m, ky_m] = \mathsf{sort}([l, Om, kx_m, ky_m])\\
      \\
      \begin{array}{l}
      \mathbf{if}\;{\left(\sqrt{\left({\sin ky\_m}^{2} + {\sin kx\_m}^{2}\right) \cdot {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} + 1}\right)}^{-1} \leq 0.46:\\
      \;\;\;\;\sqrt{0.5}\\
      
      \mathbf{else}:\\
      \;\;\;\;\sqrt{1}\\
      
      
      \end{array}
      \end{array}
      
      Derivation
      1. Split input into 2 regimes
      2. if (/.f64 #s(literal 1 binary64) (sqrt.f64 (+.f64 #s(literal 1 binary64) (*.f64 (pow.f64 (/.f64 (*.f64 #s(literal 2 binary64) l) Om) #s(literal 2 binary64)) (+.f64 (pow.f64 (sin.f64 kx) #s(literal 2 binary64)) (pow.f64 (sin.f64 ky) #s(literal 2 binary64))))))) < 0.46000000000000002

        1. Initial program 100.0%

          \[\sqrt{\frac{1}{2} \cdot \left(1 + \frac{1}{\sqrt{1 + {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} \cdot \left({\sin kx}^{2} + {\sin ky}^{2}\right)}}\right)} \]
        2. Add Preprocessing
        3. Taylor expanded in Om around 0

          \[\leadsto \sqrt{\color{blue}{\frac{1}{2}}} \]
        4. Step-by-step derivation
          1. Applied rewrites98.5%

            \[\leadsto \sqrt{\color{blue}{0.5}} \]

          if 0.46000000000000002 < (/.f64 #s(literal 1 binary64) (sqrt.f64 (+.f64 #s(literal 1 binary64) (*.f64 (pow.f64 (/.f64 (*.f64 #s(literal 2 binary64) l) Om) #s(literal 2 binary64)) (+.f64 (pow.f64 (sin.f64 kx) #s(literal 2 binary64)) (pow.f64 (sin.f64 ky) #s(literal 2 binary64)))))))

          1. Initial program 98.6%

            \[\sqrt{\frac{1}{2} \cdot \left(1 + \frac{1}{\sqrt{1 + {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} \cdot \left({\sin kx}^{2} + {\sin ky}^{2}\right)}}\right)} \]
          2. Add Preprocessing
          3. Taylor expanded in Om around inf

            \[\leadsto \sqrt{\color{blue}{1}} \]
          4. Step-by-step derivation
            1. Applied rewrites97.8%

              \[\leadsto \sqrt{\color{blue}{1}} \]
          5. Recombined 2 regimes into one program.
          6. Final simplification98.1%

            \[\leadsto \begin{array}{l} \mathbf{if}\;{\left(\sqrt{\left({\sin ky}^{2} + {\sin kx}^{2}\right) \cdot {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} + 1}\right)}^{-1} \leq 0.46:\\ \;\;\;\;\sqrt{0.5}\\ \mathbf{else}:\\ \;\;\;\;\sqrt{1}\\ \end{array} \]
          7. Add Preprocessing

          Alternative 4: 99.5% accurate, 2.3× speedup?

          \[\begin{array}{l} ky_m = \left|ky\right| \\ kx_m = \left|kx\right| \\ [l, Om, kx_m, ky_m] = \mathsf{sort}([l, Om, kx_m, ky_m])\\ \\ \sqrt{\frac{0.5}{\sqrt{\mathsf{fma}\left({\left(\frac{\ell}{Om} \cdot \sin ky\_m\right)}^{2}, 4, 1\right)}} + 0.5} \end{array} \]
          ky_m = (fabs.f64 ky)
          kx_m = (fabs.f64 kx)
          NOTE: l, Om, kx_m, and ky_m should be sorted in increasing order before calling this function.
          (FPCore (l Om kx_m ky_m)
           :precision binary64
           (sqrt (+ (/ 0.5 (sqrt (fma (pow (* (/ l Om) (sin ky_m)) 2.0) 4.0 1.0))) 0.5)))
          ky_m = fabs(ky);
          kx_m = fabs(kx);
          assert(l < Om && Om < kx_m && kx_m < ky_m);
          double code(double l, double Om, double kx_m, double ky_m) {
          	return sqrt(((0.5 / sqrt(fma(pow(((l / Om) * sin(ky_m)), 2.0), 4.0, 1.0))) + 0.5));
          }
          
          ky_m = abs(ky)
          kx_m = abs(kx)
          l, Om, kx_m, ky_m = sort([l, Om, kx_m, ky_m])
          function code(l, Om, kx_m, ky_m)
          	return sqrt(Float64(Float64(0.5 / sqrt(fma((Float64(Float64(l / Om) * sin(ky_m)) ^ 2.0), 4.0, 1.0))) + 0.5))
          end
          
          ky_m = N[Abs[ky], $MachinePrecision]
          kx_m = N[Abs[kx], $MachinePrecision]
          NOTE: l, Om, kx_m, and ky_m should be sorted in increasing order before calling this function.
          code[l_, Om_, kx$95$m_, ky$95$m_] := N[Sqrt[N[(N[(0.5 / N[Sqrt[N[(N[Power[N[(N[(l / Om), $MachinePrecision] * N[Sin[ky$95$m], $MachinePrecision]), $MachinePrecision], 2.0], $MachinePrecision] * 4.0 + 1.0), $MachinePrecision]], $MachinePrecision]), $MachinePrecision] + 0.5), $MachinePrecision]], $MachinePrecision]
          
          \begin{array}{l}
          ky_m = \left|ky\right|
          \\
          kx_m = \left|kx\right|
          \\
          [l, Om, kx_m, ky_m] = \mathsf{sort}([l, Om, kx_m, ky_m])\\
          \\
          \sqrt{\frac{0.5}{\sqrt{\mathsf{fma}\left({\left(\frac{\ell}{Om} \cdot \sin ky\_m\right)}^{2}, 4, 1\right)}} + 0.5}
          \end{array}
          
          Derivation
          1. Initial program 99.2%

            \[\sqrt{\frac{1}{2} \cdot \left(1 + \frac{1}{\sqrt{1 + {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} \cdot \left({\sin kx}^{2} + {\sin ky}^{2}\right)}}\right)} \]
          2. Add Preprocessing
          3. Taylor expanded in kx around 0

            \[\leadsto \sqrt{\color{blue}{\frac{1}{2} \cdot \left(1 + \sqrt{\frac{1}{1 + 4 \cdot \frac{{\ell}^{2} \cdot {\sin ky}^{2}}{{Om}^{2}}}}\right)}} \]
          4. Step-by-step derivation
            1. +-commutativeN/A

              \[\leadsto \sqrt{\frac{1}{2} \cdot \color{blue}{\left(\sqrt{\frac{1}{1 + 4 \cdot \frac{{\ell}^{2} \cdot {\sin ky}^{2}}{{Om}^{2}}}} + 1\right)}} \]
            2. distribute-rgt-inN/A

              \[\leadsto \sqrt{\color{blue}{\sqrt{\frac{1}{1 + 4 \cdot \frac{{\ell}^{2} \cdot {\sin ky}^{2}}{{Om}^{2}}}} \cdot \frac{1}{2} + 1 \cdot \frac{1}{2}}} \]
            3. metadata-evalN/A

              \[\leadsto \sqrt{\sqrt{\frac{1}{1 + 4 \cdot \frac{{\ell}^{2} \cdot {\sin ky}^{2}}{{Om}^{2}}}} \cdot \frac{1}{2} + \color{blue}{\frac{1}{2}}} \]
            4. lower-fma.f64N/A

              \[\leadsto \sqrt{\color{blue}{\mathsf{fma}\left(\sqrt{\frac{1}{1 + 4 \cdot \frac{{\ell}^{2} \cdot {\sin ky}^{2}}{{Om}^{2}}}}, \frac{1}{2}, \frac{1}{2}\right)}} \]
          5. Applied rewrites80.1%

            \[\leadsto \sqrt{\color{blue}{\mathsf{fma}\left(\sqrt{\frac{1}{\mathsf{fma}\left(4 \cdot \left(\ell \cdot \ell\right), \frac{{\sin ky}^{2}}{Om \cdot Om}, 1\right)}}, 0.5, 0.5\right)}} \]
          6. Applied rewrites96.2%

            \[\leadsto \sqrt{\mathsf{fma}\left(\frac{1}{\sqrt{-\left(\left(-{\left(\sin ky \cdot \frac{\ell \cdot 2}{Om}\right)}^{2}\right) + -1\right)}}, 0.5, 0.5\right)} \]
          7. Step-by-step derivation
            1. Applied rewrites96.2%

              \[\leadsto \sqrt{\frac{0.5}{\sqrt{\mathsf{fma}\left({\left(\sin ky \cdot \frac{\ell}{Om}\right)}^{2}, 4, 1\right)}} + \color{blue}{0.5}} \]
            2. Final simplification96.2%

              \[\leadsto \sqrt{\frac{0.5}{\sqrt{\mathsf{fma}\left({\left(\frac{\ell}{Om} \cdot \sin ky\right)}^{2}, 4, 1\right)}} + 0.5} \]
            3. Add Preprocessing

            Alternative 5: 56.3% accurate, 52.8× speedup?

            \[\begin{array}{l} ky_m = \left|ky\right| \\ kx_m = \left|kx\right| \\ [l, Om, kx_m, ky_m] = \mathsf{sort}([l, Om, kx_m, ky_m])\\ \\ \sqrt{0.5} \end{array} \]
            ky_m = (fabs.f64 ky)
            kx_m = (fabs.f64 kx)
            NOTE: l, Om, kx_m, and ky_m should be sorted in increasing order before calling this function.
            (FPCore (l Om kx_m ky_m) :precision binary64 (sqrt 0.5))
            ky_m = fabs(ky);
            kx_m = fabs(kx);
            assert(l < Om && Om < kx_m && kx_m < ky_m);
            double code(double l, double Om, double kx_m, double ky_m) {
            	return sqrt(0.5);
            }
            
            ky_m = abs(ky)
            kx_m = abs(kx)
            NOTE: l, Om, kx_m, and ky_m should be sorted in increasing order before calling this function.
            real(8) function code(l, om, kx_m, ky_m)
                real(8), intent (in) :: l
                real(8), intent (in) :: om
                real(8), intent (in) :: kx_m
                real(8), intent (in) :: ky_m
                code = sqrt(0.5d0)
            end function
            
            ky_m = Math.abs(ky);
            kx_m = Math.abs(kx);
            assert l < Om && Om < kx_m && kx_m < ky_m;
            public static double code(double l, double Om, double kx_m, double ky_m) {
            	return Math.sqrt(0.5);
            }
            
            ky_m = math.fabs(ky)
            kx_m = math.fabs(kx)
            [l, Om, kx_m, ky_m] = sort([l, Om, kx_m, ky_m])
            def code(l, Om, kx_m, ky_m):
            	return math.sqrt(0.5)
            
            ky_m = abs(ky)
            kx_m = abs(kx)
            l, Om, kx_m, ky_m = sort([l, Om, kx_m, ky_m])
            function code(l, Om, kx_m, ky_m)
            	return sqrt(0.5)
            end
            
            ky_m = abs(ky);
            kx_m = abs(kx);
            l, Om, kx_m, ky_m = num2cell(sort([l, Om, kx_m, ky_m])){:}
            function tmp = code(l, Om, kx_m, ky_m)
            	tmp = sqrt(0.5);
            end
            
            ky_m = N[Abs[ky], $MachinePrecision]
            kx_m = N[Abs[kx], $MachinePrecision]
            NOTE: l, Om, kx_m, and ky_m should be sorted in increasing order before calling this function.
            code[l_, Om_, kx$95$m_, ky$95$m_] := N[Sqrt[0.5], $MachinePrecision]
            
            \begin{array}{l}
            ky_m = \left|ky\right|
            \\
            kx_m = \left|kx\right|
            \\
            [l, Om, kx_m, ky_m] = \mathsf{sort}([l, Om, kx_m, ky_m])\\
            \\
            \sqrt{0.5}
            \end{array}
            
            Derivation
            1. Initial program 99.2%

              \[\sqrt{\frac{1}{2} \cdot \left(1 + \frac{1}{\sqrt{1 + {\left(\frac{2 \cdot \ell}{Om}\right)}^{2} \cdot \left({\sin kx}^{2} + {\sin ky}^{2}\right)}}\right)} \]
            2. Add Preprocessing
            3. Taylor expanded in Om around 0

              \[\leadsto \sqrt{\color{blue}{\frac{1}{2}}} \]
            4. Step-by-step derivation
              1. Applied rewrites53.7%

                \[\leadsto \sqrt{\color{blue}{0.5}} \]
              2. Add Preprocessing

              Reproduce

              ?
              herbie shell --seed 2024271 
              (FPCore (l Om kx ky)
                :name "Toniolo and Linder, Equation (3a)"
                :precision binary64
                (sqrt (* (/ 1.0 2.0) (+ 1.0 (/ 1.0 (sqrt (+ 1.0 (* (pow (/ (* 2.0 l) Om) 2.0) (+ (pow (sin kx) 2.0) (pow (sin ky) 2.0))))))))))