Result for Beckmann Sample, near normal, slope

Specification

\[\left(\left(cosTheta\_i > 0.9999 \land cosTheta\_i \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq u1 \land u1 \leq 1\right)\right) \land \left(2.328306437 \cdot 10^{-10} \leq u2 \land u2 \leq 1\right)\]

\[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (* (sqrt (- (log (- 1.0 u1)))) (cos (* (* 2.0 PI) u2))))

float code(float cosTheta_i, float u1, float u2) {
	return sqrtf(-logf((1.0f - u1))) * cosf(((2.0f * ((float) M_PI)) * u2));
}

function code(cosTheta_i, u1, u2)
	return Float32(sqrt(Float32(-log(Float32(Float32(1.0) - u1)))) * cos(Float32(Float32(Float32(2.0) * Float32(pi)) * u2)))
end

function tmp = code(cosTheta_i, u1, u2)
	tmp = sqrt(-log((single(1.0) - u1))) * cos(((single(2.0) * single(pi)) * u2));
end

\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right)

Initial Program: 57.2% accurate, 1.0× speedup?

\[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (* (sqrt (- (log (- 1.0 u1)))) (cos (* (* 2.0 PI) u2))))

float code(float cosTheta_i, float u1, float u2) {
	return sqrtf(-logf((1.0f - u1))) * cosf(((2.0f * ((float) M_PI)) * u2));
}

function code(cosTheta_i, u1, u2)
	return Float32(sqrt(Float32(-log(Float32(Float32(1.0) - u1)))) * cos(Float32(Float32(Float32(2.0) * Float32(pi)) * u2)))
end

function tmp = code(cosTheta_i, u1, u2)
	tmp = sqrt(-log((single(1.0) - u1))) * cos(((single(2.0) * single(pi)) * u2));
end

\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right)

Alternative 1: 99.2% accurate, 0.9× speedup?

\[\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \sin \left(\mathsf{fma}\left(-2, u2, 0.5\right) \cdot \pi\right) \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (* (sqrt (- (log1p (- u1)))) (sin (* (fma -2.0 u2 0.5) PI))))

float code(float cosTheta_i, float u1, float u2) {
	return sqrtf(-log1pf(-u1)) * sinf((fmaf(-2.0f, u2, 0.5f) * ((float) M_PI)));
}

function code(cosTheta_i, u1, u2)
	return Float32(sqrt(Float32(-log1p(Float32(-u1)))) * sin(Float32(fma(Float32(-2.0), u2, Float32(0.5)) * Float32(pi))))
end

\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \sin \left(\mathsf{fma}\left(-2, u2, 0.5\right) \cdot \pi\right)

Derivation

Initial program 57.2%
\[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
Step-by-step derivation
Applied rewrites99.1%
\[\leadsto \sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
Step-by-step derivation
Applied rewrites99.2%
\[\leadsto \sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \sin \left(\mathsf{fma}\left(-2, u2, 0.5\right) \cdot \pi\right) \]
Add Preprocessing

Alternative 2: 99.1% accurate, 1.0× speedup?

\[\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (* (sqrt (- (log1p (- u1)))) (cos (* 6.2831854820251465 u2))))

float code(float cosTheta_i, float u1, float u2) {
	return sqrtf(-log1pf(-u1)) * cosf((6.2831854820251465f * u2));
}

function code(cosTheta_i, u1, u2)
	return Float32(sqrt(Float32(-log1p(Float32(-u1)))) * cos(Float32(Float32(6.2831854820251465) * u2)))
end

\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \cos \left(6.2831854820251465 \cdot u2\right)

Derivation

Initial program 57.2%
\[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
Evaluated real constant57.2%
\[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
Step-by-step derivation
Applied rewrites99.1%
\[\leadsto \sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
Add Preprocessing

Alternative 3: 97.3% accurate, 0.9× speedup?

\[\begin{array}{l} t_0 := \cos \left(6.2831854820251465 \cdot u2\right)\\ \mathbf{if}\;1 - u1 \leq 0.9976000189781189:\\ \;\;\;\;\sqrt{-\log \left(1 - u1\right)} \cdot t\_0\\ \mathbf{else}:\\ \;\;\;\;\sqrt{\mathsf{fma}\left(0.5, u1, 1\right) \cdot u1} \cdot t\_0\\ \end{array} \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (let* ((t_0 (cos (* 6.2831854820251465 u2))))
  (if (<= (- 1.0 u1) 0.9976000189781189)
    (* (sqrt (- (log (- 1.0 u1)))) t_0)
    (* (sqrt (* (fma 0.5 u1 1.0) u1)) t_0))))

float code(float cosTheta_i, float u1, float u2) {
	float t_0 = cosf((6.2831854820251465f * u2));
	float tmp;
	if ((1.0f - u1) <= 0.9976000189781189f) {
		tmp = sqrtf(-logf((1.0f - u1))) * t_0;
	} else {
		tmp = sqrtf((fmaf(0.5f, u1, 1.0f) * u1)) * t_0;
	}
	return tmp;
}

function code(cosTheta_i, u1, u2)
	t_0 = cos(Float32(Float32(6.2831854820251465) * u2))
	tmp = Float32(0.0)
	if (Float32(Float32(1.0) - u1) <= Float32(0.9976000189781189))
		tmp = Float32(sqrt(Float32(-log(Float32(Float32(1.0) - u1)))) * t_0);
	else
		tmp = Float32(sqrt(Float32(fma(Float32(0.5), u1, Float32(1.0)) * u1)) * t_0);
	end
	return tmp
end

\begin{array}{l}
t_0 := \cos \left(6.2831854820251465 \cdot u2\right)\\
\mathbf{if}\;1 - u1 \leq 0.9976000189781189:\\
\;\;\;\;\sqrt{-\log \left(1 - u1\right)} \cdot t\_0\\

\mathbf{else}:\\
\;\;\;\;\sqrt{\mathsf{fma}\left(0.5, u1, 1\right) \cdot u1} \cdot t\_0\\


\end{array}

Derivation

Split input into 2 regimes
if (-.f32 #s(literal 1 binary32) u1) < 0.997600019
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Evaluated real constant57.2%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
if 0.997600019 < (-.f32 #s(literal 1 binary32) u1)
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Evaluated real constant57.2%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
3. Taylor expanded in u1 around 0
  \[\leadsto \sqrt{u1 \cdot \left(1 + \frac{1}{2} \cdot u1\right)} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
4. Step-by-step derivation
5. Applied rewrites88.3%
  \[\leadsto \sqrt{u1 \cdot \left(1 + 0.5 \cdot u1\right)} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
6. Step-by-step derivation
7. Applied rewrites88.3%
  \[\leadsto \sqrt{\mathsf{fma}\left(0.5, u1, 1\right) \cdot u1} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
Recombined 2 regimes into one program.
Add Preprocessing

Alternative 4: 96.8% accurate, 0.6× speedup?

\[\begin{array}{l} \mathbf{if}\;\cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.9980000257492065:\\ \;\;\;\;\sqrt{\mathsf{fma}\left(0.5, u1, 1\right) \cdot u1} \cdot \cos \left(6.2831854820251465 \cdot u2\right)\\ \mathbf{else}:\\ \;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(\left(\left(-2 \cdot u2\right) \cdot u2\right) \cdot \pi, \pi, 1\right)\\ \end{array} \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (if (<= (cos (* (* 2.0 PI) u2)) 0.9980000257492065)
  (* (sqrt (* (fma 0.5 u1 1.0) u1)) (cos (* 6.2831854820251465 u2)))
  (*
   (sqrt (- (log1p (- u1))))
   (fma (* (* (* -2.0 u2) u2) PI) PI 1.0))))

float code(float cosTheta_i, float u1, float u2) {
	float tmp;
	if (cosf(((2.0f * ((float) M_PI)) * u2)) <= 0.9980000257492065f) {
		tmp = sqrtf((fmaf(0.5f, u1, 1.0f) * u1)) * cosf((6.2831854820251465f * u2));
	} else {
		tmp = sqrtf(-log1pf(-u1)) * fmaf((((-2.0f * u2) * u2) * ((float) M_PI)), ((float) M_PI), 1.0f);
	}
	return tmp;
}

function code(cosTheta_i, u1, u2)
	tmp = Float32(0.0)
	if (cos(Float32(Float32(Float32(2.0) * Float32(pi)) * u2)) <= Float32(0.9980000257492065))
		tmp = Float32(sqrt(Float32(fma(Float32(0.5), u1, Float32(1.0)) * u1)) * cos(Float32(Float32(6.2831854820251465) * u2)));
	else
		tmp = Float32(sqrt(Float32(-log1p(Float32(-u1)))) * fma(Float32(Float32(Float32(Float32(-2.0) * u2) * u2) * Float32(pi)), Float32(pi), Float32(1.0)));
	end
	return tmp
end

\begin{array}{l}
\mathbf{if}\;\cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.9980000257492065:\\
\;\;\;\;\sqrt{\mathsf{fma}\left(0.5, u1, 1\right) \cdot u1} \cdot \cos \left(6.2831854820251465 \cdot u2\right)\\

\mathbf{else}:\\
\;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(\left(\left(-2 \cdot u2\right) \cdot u2\right) \cdot \pi, \pi, 1\right)\\


\end{array}

Derivation

Split input into 2 regimes
if (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)) < 0.998000026
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Evaluated real constant57.2%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
3. Taylor expanded in u1 around 0
  \[\leadsto \sqrt{u1 \cdot \left(1 + \frac{1}{2} \cdot u1\right)} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
4. Step-by-step derivation
5. Applied rewrites88.3%
  \[\leadsto \sqrt{u1 \cdot \left(1 + 0.5 \cdot u1\right)} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
6. Step-by-step derivation
7. Applied rewrites88.3%
  \[\leadsto \sqrt{\mathsf{fma}\left(0.5, u1, 1\right) \cdot u1} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
if 0.998000026 < (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
3. Step-by-step derivation
4. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
5. Step-by-step derivation
6. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(\left(\left(-2 \cdot u2\right) \cdot u2\right) \cdot \pi, \pi, 1\right) \]
7. Step-by-step derivation
8. Applied rewrites88.2%
  \[\leadsto \sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(\left(\left(-2 \cdot u2\right) \cdot u2\right) \cdot \pi, \pi, 1\right) \]
Recombined 2 regimes into one program.
Add Preprocessing

Alternative 5: 94.7% accurate, 1.0× speedup?

\[\begin{array}{l} \mathbf{if}\;\left(2 \cdot \pi\right) \cdot u2 \leq 0.18000000715255737:\\ \;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\ \mathbf{else}:\\ \;\;\;\;\sqrt{u1} \cdot \sin \left(\mathsf{fma}\left(-2, u2, 0.5\right) \cdot \pi\right)\\ \end{array} \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (if (<= (* (* 2.0 PI) u2) 0.18000000715255737)
  (*
   (sqrt (- (log1p (- u1))))
   (fma (* u2 u2) -19.739208221435547 1.0))
  (* (sqrt u1) (sin (* (fma -2.0 u2 0.5) PI)))))

float code(float cosTheta_i, float u1, float u2) {
	float tmp;
	if (((2.0f * ((float) M_PI)) * u2) <= 0.18000000715255737f) {
		tmp = sqrtf(-log1pf(-u1)) * fmaf((u2 * u2), -19.739208221435547f, 1.0f);
	} else {
		tmp = sqrtf(u1) * sinf((fmaf(-2.0f, u2, 0.5f) * ((float) M_PI)));
	}
	return tmp;
}

function code(cosTheta_i, u1, u2)
	tmp = Float32(0.0)
	if (Float32(Float32(Float32(2.0) * Float32(pi)) * u2) <= Float32(0.18000000715255737))
		tmp = Float32(sqrt(Float32(-log1p(Float32(-u1)))) * fma(Float32(u2 * u2), Float32(-19.739208221435547), Float32(1.0)));
	else
		tmp = Float32(sqrt(u1) * sin(Float32(fma(Float32(-2.0), u2, Float32(0.5)) * Float32(pi))));
	end
	return tmp
end

\begin{array}{l}
\mathbf{if}\;\left(2 \cdot \pi\right) \cdot u2 \leq 0.18000000715255737:\\
\;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\

\mathbf{else}:\\
\;\;\;\;\sqrt{u1} \cdot \sin \left(\mathsf{fma}\left(-2, u2, 0.5\right) \cdot \pi\right)\\


\end{array}

Derivation

Split input into 2 regimes
if (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) < 0.180000007
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
3. Step-by-step derivation
4. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
5. Step-by-step derivation
6. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, \left(\pi \cdot \pi\right) \cdot -2, 1\right) \]
7. Evaluated real constant52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
8. Step-by-step derivation
9. Applied rewrites88.2%
  \[\leadsto \sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
if 0.180000007 < (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u1 around 0
  \[\leadsto \sqrt{u1} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
3. Step-by-step derivation
4. Applied rewrites76.8%
  \[\leadsto \sqrt{u1} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
5. Step-by-step derivation
6. Applied rewrites76.9%
  \[\leadsto \sqrt{u1} \cdot \sin \left(\mathsf{fma}\left(-2, u2, 0.5\right) \cdot \pi\right) \]
Recombined 2 regimes into one program.
Add Preprocessing

Alternative 6: 94.6% accurate, 0.6× speedup?

\[\begin{array}{l} \mathbf{if}\;\cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.9850000143051147:\\ \;\;\;\;\sin \left(\mathsf{fma}\left(-2, u2 \cdot \pi, 1.5707963705062866\right)\right) \cdot \sqrt{u1}\\ \mathbf{else}:\\ \;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\ \end{array} \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (if (<= (cos (* (* 2.0 PI) u2)) 0.9850000143051147)
  (* (sin (fma -2.0 (* u2 PI) 1.5707963705062866)) (sqrt u1))
  (*
   (sqrt (- (log1p (- u1))))
   (fma (* u2 u2) -19.739208221435547 1.0))))

float code(float cosTheta_i, float u1, float u2) {
	float tmp;
	if (cosf(((2.0f * ((float) M_PI)) * u2)) <= 0.9850000143051147f) {
		tmp = sinf(fmaf(-2.0f, (u2 * ((float) M_PI)), 1.5707963705062866f)) * sqrtf(u1);
	} else {
		tmp = sqrtf(-log1pf(-u1)) * fmaf((u2 * u2), -19.739208221435547f, 1.0f);
	}
	return tmp;
}

function code(cosTheta_i, u1, u2)
	tmp = Float32(0.0)
	if (cos(Float32(Float32(Float32(2.0) * Float32(pi)) * u2)) <= Float32(0.9850000143051147))
		tmp = Float32(sin(fma(Float32(-2.0), Float32(u2 * Float32(pi)), Float32(1.5707963705062866))) * sqrt(u1));
	else
		tmp = Float32(sqrt(Float32(-log1p(Float32(-u1)))) * fma(Float32(u2 * u2), Float32(-19.739208221435547), Float32(1.0)));
	end
	return tmp
end

\begin{array}{l}
\mathbf{if}\;\cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.9850000143051147:\\
\;\;\;\;\sin \left(\mathsf{fma}\left(-2, u2 \cdot \pi, 1.5707963705062866\right)\right) \cdot \sqrt{u1}\\

\mathbf{else}:\\
\;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\


\end{array}

Derivation

Split input into 2 regimes
if (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)) < 0.985000014
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Step-by-step derivation
3. Applied rewrites57.2%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \sin \left(\mathsf{fma}\left(-2 \cdot u2, \pi, 0.5 \cdot \pi\right)\right) \]
4. Taylor expanded in u1 around 0
  \[\leadsto \sin \left(-2 \cdot \left(u2 \cdot \pi\right) + \frac{1}{2} \cdot \pi\right) \cdot \sqrt{u1} \]
5. Step-by-step derivation
6. Applied rewrites76.8%
  \[\leadsto \sin \left(\mathsf{fma}\left(-2, u2 \cdot \pi, 0.5 \cdot \pi\right)\right) \cdot \sqrt{u1} \]
7. Evaluated real constant76.8%
  \[\leadsto \sin \left(\mathsf{fma}\left(-2, u2 \cdot \pi, 1.5707963705062866\right)\right) \cdot \sqrt{u1} \]
if 0.985000014 < (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
3. Step-by-step derivation
4. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
5. Step-by-step derivation
6. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, \left(\pi \cdot \pi\right) \cdot -2, 1\right) \]
7. Evaluated real constant52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
8. Step-by-step derivation
9. Applied rewrites88.2%
  \[\leadsto \sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
Recombined 2 regimes into one program.
Add Preprocessing

Alternative 7: 94.6% accurate, 1.0× speedup?

\[\begin{array}{l} \mathbf{if}\;\left(2 \cdot \pi\right) \cdot u2 \leq 0.18000000715255737:\\ \;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\ \mathbf{else}:\\ \;\;\;\;\sqrt{u1} \cdot \sin \left(\mathsf{fma}\left(6.2831854820251465, u2, 1.5707963705062866\right)\right)\\ \end{array} \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (if (<= (* (* 2.0 PI) u2) 0.18000000715255737)
  (*
   (sqrt (- (log1p (- u1))))
   (fma (* u2 u2) -19.739208221435547 1.0))
  (* (sqrt u1) (sin (fma 6.2831854820251465 u2 1.5707963705062866)))))

float code(float cosTheta_i, float u1, float u2) {
	float tmp;
	if (((2.0f * ((float) M_PI)) * u2) <= 0.18000000715255737f) {
		tmp = sqrtf(-log1pf(-u1)) * fmaf((u2 * u2), -19.739208221435547f, 1.0f);
	} else {
		tmp = sqrtf(u1) * sinf(fmaf(6.2831854820251465f, u2, 1.5707963705062866f));
	}
	return tmp;
}

function code(cosTheta_i, u1, u2)
	tmp = Float32(0.0)
	if (Float32(Float32(Float32(2.0) * Float32(pi)) * u2) <= Float32(0.18000000715255737))
		tmp = Float32(sqrt(Float32(-log1p(Float32(-u1)))) * fma(Float32(u2 * u2), Float32(-19.739208221435547), Float32(1.0)));
	else
		tmp = Float32(sqrt(u1) * sin(fma(Float32(6.2831854820251465), u2, Float32(1.5707963705062866))));
	end
	return tmp
end

\begin{array}{l}
\mathbf{if}\;\left(2 \cdot \pi\right) \cdot u2 \leq 0.18000000715255737:\\
\;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\

\mathbf{else}:\\
\;\;\;\;\sqrt{u1} \cdot \sin \left(\mathsf{fma}\left(6.2831854820251465, u2, 1.5707963705062866\right)\right)\\


\end{array}

Derivation

Split input into 2 regimes
if (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) < 0.180000007
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
3. Step-by-step derivation
4. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
5. Step-by-step derivation
6. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, \left(\pi \cdot \pi\right) \cdot -2, 1\right) \]
7. Evaluated real constant52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
8. Step-by-step derivation
9. Applied rewrites88.2%
  \[\leadsto \sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
if 0.180000007 < (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Evaluated real constant57.2%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
3. Taylor expanded in u1 around 0
  \[\leadsto \sqrt{u1} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
4. Step-by-step derivation
5. Applied rewrites76.8%
  \[\leadsto \sqrt{u1} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
6. Step-by-step derivation
7. Applied rewrites76.8%
  \[\leadsto \sqrt{u1} \cdot \sin \left(\mathsf{fma}\left(6.2831854820251465, u2, 0.5 \cdot \pi\right)\right) \]
8. Evaluated real constant76.8%
  \[\leadsto \sqrt{u1} \cdot \sin \left(\mathsf{fma}\left(6.2831854820251465, u2, 1.5707963705062866\right)\right) \]
Recombined 2 regimes into one program.
Add Preprocessing

Alternative 8: 94.6% accurate, 1.1× speedup?

\[\begin{array}{l} \mathbf{if}\;\left(2 \cdot \pi\right) \cdot u2 \leq 0.18000000715255737:\\ \;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\ \mathbf{else}:\\ \;\;\;\;\sqrt{u1} \cdot \cos \left(6.2831854820251465 \cdot u2\right)\\ \end{array} \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (if (<= (* (* 2.0 PI) u2) 0.18000000715255737)
  (*
   (sqrt (- (log1p (- u1))))
   (fma (* u2 u2) -19.739208221435547 1.0))
  (* (sqrt u1) (cos (* 6.2831854820251465 u2)))))

float code(float cosTheta_i, float u1, float u2) {
	float tmp;
	if (((2.0f * ((float) M_PI)) * u2) <= 0.18000000715255737f) {
		tmp = sqrtf(-log1pf(-u1)) * fmaf((u2 * u2), -19.739208221435547f, 1.0f);
	} else {
		tmp = sqrtf(u1) * cosf((6.2831854820251465f * u2));
	}
	return tmp;
}

function code(cosTheta_i, u1, u2)
	tmp = Float32(0.0)
	if (Float32(Float32(Float32(2.0) * Float32(pi)) * u2) <= Float32(0.18000000715255737))
		tmp = Float32(sqrt(Float32(-log1p(Float32(-u1)))) * fma(Float32(u2 * u2), Float32(-19.739208221435547), Float32(1.0)));
	else
		tmp = Float32(sqrt(u1) * cos(Float32(Float32(6.2831854820251465) * u2)));
	end
	return tmp
end

\begin{array}{l}
\mathbf{if}\;\left(2 \cdot \pi\right) \cdot u2 \leq 0.18000000715255737:\\
\;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\

\mathbf{else}:\\
\;\;\;\;\sqrt{u1} \cdot \cos \left(6.2831854820251465 \cdot u2\right)\\


\end{array}

Derivation

Split input into 2 regimes
if (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) < 0.180000007
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
3. Step-by-step derivation
4. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
5. Step-by-step derivation
6. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, \left(\pi \cdot \pi\right) \cdot -2, 1\right) \]
7. Evaluated real constant52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
8. Step-by-step derivation
9. Applied rewrites88.2%
  \[\leadsto \sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
if 0.180000007 < (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Evaluated real constant57.2%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
3. Taylor expanded in u1 around 0
  \[\leadsto \sqrt{u1} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
4. Step-by-step derivation
5. Applied rewrites76.8%
  \[\leadsto \sqrt{u1} \cdot \cos \left(6.2831854820251465 \cdot u2\right) \]
Recombined 2 regimes into one program.
Add Preprocessing

Alternative 9: 88.2% accurate, 2.1× speedup?

\[\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (* (sqrt (- (log1p (- u1)))) (fma (* u2 u2) -19.739208221435547 1.0)))

float code(float cosTheta_i, float u1, float u2) {
	return sqrtf(-log1pf(-u1)) * fmaf((u2 * u2), -19.739208221435547f, 1.0f);
}

function code(cosTheta_i, u1, u2)
	return Float32(sqrt(Float32(-log1p(Float32(-u1)))) * fma(Float32(u2 * u2), Float32(-19.739208221435547), Float32(1.0)))
end

\sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)

Derivation

Initial program 57.2%
\[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
Taylor expanded in u2 around 0
\[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
Step-by-step derivation
Applied rewrites52.5%
\[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
Step-by-step derivation
Applied rewrites52.5%
\[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, \left(\pi \cdot \pi\right) \cdot -2, 1\right) \]
Evaluated real constant52.5%
\[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
Step-by-step derivation
Applied rewrites88.2%
\[\leadsto \sqrt{-\mathsf{log1p}\left(-u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
Add Preprocessing

Alternative 10: 86.8% accurate, 0.7× speedup?

\[\begin{array}{l} t_0 := \sqrt{-\log \left(1 - u1\right)}\\ t_1 := \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\ \mathbf{if}\;t\_0 \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.04800000041723251:\\ \;\;\;\;\sqrt{u1 \cdot \left(1 + 0.5 \cdot u1\right)} \cdot t\_1\\ \mathbf{else}:\\ \;\;\;\;t\_0 \cdot t\_1\\ \end{array} \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (let* ((t_0 (sqrt (- (log (- 1.0 u1)))))
       (t_1 (fma (* u2 u2) -19.739208221435547 1.0)))
  (if (<= (* t_0 (cos (* (* 2.0 PI) u2))) 0.04800000041723251)
    (* (sqrt (* u1 (+ 1.0 (* 0.5 u1)))) t_1)
    (* t_0 t_1))))

float code(float cosTheta_i, float u1, float u2) {
	float t_0 = sqrtf(-logf((1.0f - u1)));
	float t_1 = fmaf((u2 * u2), -19.739208221435547f, 1.0f);
	float tmp;
	if ((t_0 * cosf(((2.0f * ((float) M_PI)) * u2))) <= 0.04800000041723251f) {
		tmp = sqrtf((u1 * (1.0f + (0.5f * u1)))) * t_1;
	} else {
		tmp = t_0 * t_1;
	}
	return tmp;
}

function code(cosTheta_i, u1, u2)
	t_0 = sqrt(Float32(-log(Float32(Float32(1.0) - u1))))
	t_1 = fma(Float32(u2 * u2), Float32(-19.739208221435547), Float32(1.0))
	tmp = Float32(0.0)
	if (Float32(t_0 * cos(Float32(Float32(Float32(2.0) * Float32(pi)) * u2))) <= Float32(0.04800000041723251))
		tmp = Float32(sqrt(Float32(u1 * Float32(Float32(1.0) + Float32(Float32(0.5) * u1)))) * t_1);
	else
		tmp = Float32(t_0 * t_1);
	end
	return tmp
end

\begin{array}{l}
t_0 := \sqrt{-\log \left(1 - u1\right)}\\
t_1 := \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\
\mathbf{if}\;t\_0 \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.04800000041723251:\\
\;\;\;\;\sqrt{u1 \cdot \left(1 + 0.5 \cdot u1\right)} \cdot t\_1\\

\mathbf{else}:\\
\;\;\;\;t\_0 \cdot t\_1\\


\end{array}

Derivation

Split input into 2 regimes
if (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.0480000004
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
3. Step-by-step derivation
4. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
5. Step-by-step derivation
6. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, \left(\pi \cdot \pi\right) \cdot -2, 1\right) \]
7. Evaluated real constant52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
8. Taylor expanded in u1 around 0
  \[\leadsto \sqrt{u1 \cdot \left(1 + \frac{1}{2} \cdot u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
9. Step-by-step derivation
10. Applied rewrites79.2%
  \[\leadsto \sqrt{u1 \cdot \left(1 + 0.5 \cdot u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
if 0.0480000004 < (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
3. Step-by-step derivation
4. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
5. Step-by-step derivation
6. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, \left(\pi \cdot \pi\right) \cdot -2, 1\right) \]
7. Evaluated real constant52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
Recombined 2 regimes into one program.
Add Preprocessing

Alternative 11: 86.8% accurate, 0.7× speedup?

\[\begin{array}{l} t_0 := \sqrt{-\log \left(1 - u1\right)}\\ \mathbf{if}\;t\_0 \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.04800000041723251:\\ \;\;\;\;\sqrt{u1 \cdot \left(1 + 0.5 \cdot u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\ \mathbf{else}:\\ \;\;\;\;t\_0 \cdot \mathsf{fma}\left(u2, u2 \cdot -19.739208221435547, 1\right)\\ \end{array} \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (let* ((t_0 (sqrt (- (log (- 1.0 u1))))))
  (if (<= (* t_0 (cos (* (* 2.0 PI) u2))) 0.04800000041723251)
    (*
     (sqrt (* u1 (+ 1.0 (* 0.5 u1))))
     (fma (* u2 u2) -19.739208221435547 1.0))
    (* t_0 (fma u2 (* u2 -19.739208221435547) 1.0)))))

float code(float cosTheta_i, float u1, float u2) {
	float t_0 = sqrtf(-logf((1.0f - u1)));
	float tmp;
	if ((t_0 * cosf(((2.0f * ((float) M_PI)) * u2))) <= 0.04800000041723251f) {
		tmp = sqrtf((u1 * (1.0f + (0.5f * u1)))) * fmaf((u2 * u2), -19.739208221435547f, 1.0f);
	} else {
		tmp = t_0 * fmaf(u2, (u2 * -19.739208221435547f), 1.0f);
	}
	return tmp;
}

function code(cosTheta_i, u1, u2)
	t_0 = sqrt(Float32(-log(Float32(Float32(1.0) - u1))))
	tmp = Float32(0.0)
	if (Float32(t_0 * cos(Float32(Float32(Float32(2.0) * Float32(pi)) * u2))) <= Float32(0.04800000041723251))
		tmp = Float32(sqrt(Float32(u1 * Float32(Float32(1.0) + Float32(Float32(0.5) * u1)))) * fma(Float32(u2 * u2), Float32(-19.739208221435547), Float32(1.0)));
	else
		tmp = Float32(t_0 * fma(u2, Float32(u2 * Float32(-19.739208221435547)), Float32(1.0)));
	end
	return tmp
end

\begin{array}{l}
t_0 := \sqrt{-\log \left(1 - u1\right)}\\
\mathbf{if}\;t\_0 \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.04800000041723251:\\
\;\;\;\;\sqrt{u1 \cdot \left(1 + 0.5 \cdot u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\

\mathbf{else}:\\
\;\;\;\;t\_0 \cdot \mathsf{fma}\left(u2, u2 \cdot -19.739208221435547, 1\right)\\


\end{array}

Derivation

Split input into 2 regimes
if (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.0480000004
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
3. Step-by-step derivation
4. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
5. Step-by-step derivation
6. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, \left(\pi \cdot \pi\right) \cdot -2, 1\right) \]
7. Evaluated real constant52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
8. Taylor expanded in u1 around 0
  \[\leadsto \sqrt{u1 \cdot \left(1 + \frac{1}{2} \cdot u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
9. Step-by-step derivation
10. Applied rewrites79.2%
  \[\leadsto \sqrt{u1 \cdot \left(1 + 0.5 \cdot u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
if 0.0480000004 < (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
3. Step-by-step derivation
4. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
5. Evaluated real constant52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot 9.869604110717773\right)\right) \]
6. Step-by-step derivation
7. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -19.739208221435547 \cdot \left(u2 \cdot u2\right)\right) \]
8. Step-by-step derivation
9. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2, u2 \cdot -19.739208221435547, 1\right) \]
Recombined 2 regimes into one program.
Add Preprocessing

Alternative 12: 85.8% accurate, 0.7× speedup?

\[\begin{array}{l} \mathbf{if}\;\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.05000000074505806:\\ \;\;\;\;\sqrt{u1 \cdot \left(1 + 0.5 \cdot u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\ \mathbf{else}:\\ \;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)}\\ \end{array} \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (if (<=
     (* (sqrt (- (log (- 1.0 u1)))) (cos (* (* 2.0 PI) u2)))
     0.05000000074505806)
  (*
   (sqrt (* u1 (+ 1.0 (* 0.5 u1))))
   (fma (* u2 u2) -19.739208221435547 1.0))
  (sqrt (- (log1p (- u1))))))

float code(float cosTheta_i, float u1, float u2) {
	float tmp;
	if ((sqrtf(-logf((1.0f - u1))) * cosf(((2.0f * ((float) M_PI)) * u2))) <= 0.05000000074505806f) {
		tmp = sqrtf((u1 * (1.0f + (0.5f * u1)))) * fmaf((u2 * u2), -19.739208221435547f, 1.0f);
	} else {
		tmp = sqrtf(-log1pf(-u1));
	}
	return tmp;
}

function code(cosTheta_i, u1, u2)
	tmp = Float32(0.0)
	if (Float32(sqrt(Float32(-log(Float32(Float32(1.0) - u1)))) * cos(Float32(Float32(Float32(2.0) * Float32(pi)) * u2))) <= Float32(0.05000000074505806))
		tmp = Float32(sqrt(Float32(u1 * Float32(Float32(1.0) + Float32(Float32(0.5) * u1)))) * fma(Float32(u2 * u2), Float32(-19.739208221435547), Float32(1.0)));
	else
		tmp = sqrt(Float32(-log1p(Float32(-u1))));
	end
	return tmp
end

\begin{array}{l}
\mathbf{if}\;\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.05000000074505806:\\
\;\;\;\;\sqrt{u1 \cdot \left(1 + 0.5 \cdot u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\

\mathbf{else}:\\
\;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)}\\


\end{array}

Derivation

Split input into 2 regimes
if (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.0500000007
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
3. Step-by-step derivation
4. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
5. Step-by-step derivation
6. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, \left(\pi \cdot \pi\right) \cdot -2, 1\right) \]
7. Evaluated real constant52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
8. Taylor expanded in u1 around 0
  \[\leadsto \sqrt{u1 \cdot \left(1 + \frac{1}{2} \cdot u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
9. Step-by-step derivation
10. Applied rewrites79.2%
  \[\leadsto \sqrt{u1 \cdot \left(1 + 0.5 \cdot u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
if 0.0500000007 < (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{\mathsf{neg}\left(\log \left(1 - u1\right)\right)} \]
3. Step-by-step derivation
4. Applied rewrites49.1%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \]
5. Step-by-step derivation
6. Applied rewrites79.8%
  \[\leadsto \sqrt{-\mathsf{log1p}\left(-u1\right)} \]
Recombined 2 regimes into one program.
Add Preprocessing

Alternative 13: 83.4% accurate, 0.8× speedup?

\[\begin{array}{l} \mathbf{if}\;\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.0012499999720603228:\\ \;\;\;\;\sqrt{u1} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\ \mathbf{else}:\\ \;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)}\\ \end{array} \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (if (<=
     (* (sqrt (- (log (- 1.0 u1)))) (cos (* (* 2.0 PI) u2)))
     0.0012499999720603228)
  (* (sqrt u1) (fma (* u2 u2) -19.739208221435547 1.0))
  (sqrt (- (log1p (- u1))))))

float code(float cosTheta_i, float u1, float u2) {
	float tmp;
	if ((sqrtf(-logf((1.0f - u1))) * cosf(((2.0f * ((float) M_PI)) * u2))) <= 0.0012499999720603228f) {
		tmp = sqrtf(u1) * fmaf((u2 * u2), -19.739208221435547f, 1.0f);
	} else {
		tmp = sqrtf(-log1pf(-u1));
	}
	return tmp;
}

function code(cosTheta_i, u1, u2)
	tmp = Float32(0.0)
	if (Float32(sqrt(Float32(-log(Float32(Float32(1.0) - u1)))) * cos(Float32(Float32(Float32(2.0) * Float32(pi)) * u2))) <= Float32(0.0012499999720603228))
		tmp = Float32(sqrt(u1) * fma(Float32(u2 * u2), Float32(-19.739208221435547), Float32(1.0)));
	else
		tmp = sqrt(Float32(-log1p(Float32(-u1))));
	end
	return tmp
end

\begin{array}{l}
\mathbf{if}\;\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.0012499999720603228:\\
\;\;\;\;\sqrt{u1} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\

\mathbf{else}:\\
\;\;\;\;\sqrt{-\mathsf{log1p}\left(-u1\right)}\\


\end{array}

Derivation

Split input into 2 regimes
if (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.00124999997
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
3. Step-by-step derivation
4. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
5. Step-by-step derivation
6. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, \left(\pi \cdot \pi\right) \cdot -2, 1\right) \]
7. Evaluated real constant52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
8. Taylor expanded in u1 around 0
  \[\leadsto \sqrt{u1} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
9. Step-by-step derivation
10. Applied rewrites69.7%
  \[\leadsto \sqrt{u1} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
if 0.00124999997 < (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{\mathsf{neg}\left(\log \left(1 - u1\right)\right)} \]
3. Step-by-step derivation
4. Applied rewrites49.1%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \]
5. Step-by-step derivation
6. Applied rewrites79.8%
  \[\leadsto \sqrt{-\mathsf{log1p}\left(-u1\right)} \]
Recombined 2 regimes into one program.
Add Preprocessing

Alternative 14: 79.6% accurate, 0.8× speedup?

\[\begin{array}{l} t_0 := \sqrt{-\log \left(1 - u1\right)}\\ \mathbf{if}\;t\_0 \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.008999999612569809:\\ \;\;\;\;\sqrt{u1} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\ \mathbf{else}:\\ \;\;\;\;t\_0\\ \end{array} \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (let* ((t_0 (sqrt (- (log (- 1.0 u1))))))
  (if (<= (* t_0 (cos (* (* 2.0 PI) u2))) 0.008999999612569809)
    (* (sqrt u1) (fma (* u2 u2) -19.739208221435547 1.0))
    t_0)))

float code(float cosTheta_i, float u1, float u2) {
	float t_0 = sqrtf(-logf((1.0f - u1)));
	float tmp;
	if ((t_0 * cosf(((2.0f * ((float) M_PI)) * u2))) <= 0.008999999612569809f) {
		tmp = sqrtf(u1) * fmaf((u2 * u2), -19.739208221435547f, 1.0f);
	} else {
		tmp = t_0;
	}
	return tmp;
}

function code(cosTheta_i, u1, u2)
	t_0 = sqrt(Float32(-log(Float32(Float32(1.0) - u1))))
	tmp = Float32(0.0)
	if (Float32(t_0 * cos(Float32(Float32(Float32(2.0) * Float32(pi)) * u2))) <= Float32(0.008999999612569809))
		tmp = Float32(sqrt(u1) * fma(Float32(u2 * u2), Float32(-19.739208221435547), Float32(1.0)));
	else
		tmp = t_0;
	end
	return tmp
end

\begin{array}{l}
t_0 := \sqrt{-\log \left(1 - u1\right)}\\
\mathbf{if}\;t\_0 \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.008999999612569809:\\
\;\;\;\;\sqrt{u1} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right)\\

\mathbf{else}:\\
\;\;\;\;t\_0\\


\end{array}

Derivation

Split input into 2 regimes
if (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.00899999961
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
3. Step-by-step derivation
4. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \left(1 + -2 \cdot \left({u2}^{2} \cdot {\pi}^{2}\right)\right) \]
5. Step-by-step derivation
6. Applied rewrites52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, \left(\pi \cdot \pi\right) \cdot -2, 1\right) \]
7. Evaluated real constant52.5%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
8. Taylor expanded in u1 around 0
  \[\leadsto \sqrt{u1} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
9. Step-by-step derivation
10. Applied rewrites69.7%
  \[\leadsto \sqrt{u1} \cdot \mathsf{fma}\left(u2 \cdot u2, -19.739208221435547, 1\right) \]
if 0.00899999961 < (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{\mathsf{neg}\left(\log \left(1 - u1\right)\right)} \]
3. Step-by-step derivation
4. Applied rewrites49.1%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \]
Recombined 2 regimes into one program.
Add Preprocessing

Alternative 15: 78.8% accurate, 0.8× speedup?

\[\begin{array}{l} t_0 := \sqrt{-\log \left(1 - u1\right)}\\ \mathbf{if}\;t\_0 \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.05000000074505806:\\ \;\;\;\;\sqrt{\mathsf{fma}\left(0.5, u1, 1\right) \cdot u1}\\ \mathbf{else}:\\ \;\;\;\;t\_0\\ \end{array} \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (let* ((t_0 (sqrt (- (log (- 1.0 u1))))))
  (if (<= (* t_0 (cos (* (* 2.0 PI) u2))) 0.05000000074505806)
    (sqrt (* (fma 0.5 u1 1.0) u1))
    t_0)))

float code(float cosTheta_i, float u1, float u2) {
	float t_0 = sqrtf(-logf((1.0f - u1)));
	float tmp;
	if ((t_0 * cosf(((2.0f * ((float) M_PI)) * u2))) <= 0.05000000074505806f) {
		tmp = sqrtf((fmaf(0.5f, u1, 1.0f) * u1));
	} else {
		tmp = t_0;
	}
	return tmp;
}

function code(cosTheta_i, u1, u2)
	t_0 = sqrt(Float32(-log(Float32(Float32(1.0) - u1))))
	tmp = Float32(0.0)
	if (Float32(t_0 * cos(Float32(Float32(Float32(2.0) * Float32(pi)) * u2))) <= Float32(0.05000000074505806))
		tmp = sqrt(Float32(fma(Float32(0.5), u1, Float32(1.0)) * u1));
	else
		tmp = t_0;
	end
	return tmp
end

\begin{array}{l}
t_0 := \sqrt{-\log \left(1 - u1\right)}\\
\mathbf{if}\;t\_0 \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \leq 0.05000000074505806:\\
\;\;\;\;\sqrt{\mathsf{fma}\left(0.5, u1, 1\right) \cdot u1}\\

\mathbf{else}:\\
\;\;\;\;t\_0\\


\end{array}

Derivation

Split input into 2 regimes
if (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.0500000007
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{\mathsf{neg}\left(\log \left(1 - u1\right)\right)} \]
3. Step-by-step derivation
4. Applied rewrites49.1%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \]
5. Taylor expanded in u1 around 0
  \[\leadsto \sqrt{u1 \cdot \left(1 + \frac{1}{2} \cdot u1\right)} \]
6. Step-by-step derivation
7. Applied rewrites72.6%
  \[\leadsto \sqrt{u1 \cdot \left(1 + 0.5 \cdot u1\right)} \]
8. Step-by-step derivation
9. Applied rewrites72.6%
  \[\leadsto \sqrt{\mathsf{fma}\left(0.5, u1, 1\right) \cdot u1} \]
if 0.0500000007 < (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))
1. Initial program 57.2%
  \[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
2. Taylor expanded in u2 around 0
  \[\leadsto \sqrt{\mathsf{neg}\left(\log \left(1 - u1\right)\right)} \]
3. Step-by-step derivation
4. Applied rewrites49.1%
  \[\leadsto \sqrt{-\log \left(1 - u1\right)} \]
Recombined 2 regimes into one program.
Add Preprocessing

Alternative 16: 72.6% accurate, 5.1× speedup?

\[\sqrt{\mathsf{fma}\left(0.5, u1, 1\right) \cdot u1} \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (sqrt (* (fma 0.5 u1 1.0) u1)))

float code(float cosTheta_i, float u1, float u2) {
	return sqrtf((fmaf(0.5f, u1, 1.0f) * u1));
}

function code(cosTheta_i, u1, u2)
	return sqrt(Float32(fma(Float32(0.5), u1, Float32(1.0)) * u1))
end

\sqrt{\mathsf{fma}\left(0.5, u1, 1\right) \cdot u1}

Derivation

Initial program 57.2%
\[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
Taylor expanded in u2 around 0
\[\leadsto \sqrt{\mathsf{neg}\left(\log \left(1 - u1\right)\right)} \]
Step-by-step derivation
Applied rewrites49.1%
\[\leadsto \sqrt{-\log \left(1 - u1\right)} \]
Taylor expanded in u1 around 0
\[\leadsto \sqrt{u1 \cdot \left(1 + \frac{1}{2} \cdot u1\right)} \]
Step-by-step derivation
Applied rewrites72.6%
\[\leadsto \sqrt{u1 \cdot \left(1 + 0.5 \cdot u1\right)} \]
Step-by-step derivation
Applied rewrites72.6%
\[\leadsto \sqrt{\mathsf{fma}\left(0.5, u1, 1\right) \cdot u1} \]
Add Preprocessing

Alternative 17: 64.9% accurate, 18.5× speedup?

\[\sqrt{u1} \]

(FPCore (cosTheta_i u1 u2)
  :precision binary32
  :pre (and (and (and (> cosTheta_i 0.9999) (<= cosTheta_i 1.0))
          (and (<= 2.328306437e-10 u1) (<= u1 1.0)))
     (and (<= 2.328306437e-10 u2) (<= u2 1.0)))
  (sqrt u1))

float code(float cosTheta_i, float u1, float u2) {
	return sqrtf(u1);
}

real(4) function code(costheta_i, u1, u2)
use fmin_fmax_functions
    real(4), intent (in) :: costheta_i
    real(4), intent (in) :: u1
    real(4), intent (in) :: u2
    code = sqrt(u1)
end function

function code(cosTheta_i, u1, u2)
	return sqrt(u1)
end

function tmp = code(cosTheta_i, u1, u2)
	tmp = sqrt(u1);
end

\sqrt{u1}

Derivation

Initial program 57.2%
\[\sqrt{-\log \left(1 - u1\right)} \cdot \cos \left(\left(2 \cdot \pi\right) \cdot u2\right) \]
Taylor expanded in u2 around 0
\[\leadsto \sqrt{\mathsf{neg}\left(\log \left(1 - u1\right)\right)} \]
Step-by-step derivation
Applied rewrites49.1%
\[\leadsto \sqrt{-\log \left(1 - u1\right)} \]
Taylor expanded in u1 around 0
\[\leadsto \sqrt{u1} \]
Step-by-step derivation
Applied rewrites64.9%
\[\leadsto \sqrt{u1} \]
Add Preprocessing

Specification

Local Percentage Accuracy vs ?

Accuracy vs Speed?

Initial Program: 57.2% accurate, 1.0× speedupMathFPCoreCJuliaMATLABTeX?

Alternative 1: 99.2% accurate, 0.9× speedupMathFPCoreCJuliaTeX?

Alternative 2: 99.1% accurate, 1.0× speedupMathFPCoreCJuliaTeX?

Alternative 3: 97.3% accurate, 0.9× speedupMathFPCoreCJuliaTeX?

if (-.f32 #s(literal 1 binary32) u1) < 0.997600019

if 0.997600019 < (-.f32 #s(literal 1 binary32) u1)

Alternative 4: 96.8% accurate, 0.6× speedupMathFPCoreCJuliaTeX?

if (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)) < 0.998000026

if 0.998000026 < (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))

Alternative 5: 94.7% accurate, 1.0× speedupMathFPCoreCJuliaTeX?

if (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) < 0.180000007

if 0.180000007 < (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)

Alternative 6: 94.6% accurate, 0.6× speedupMathFPCoreCJuliaTeX?

if (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)) < 0.985000014

if 0.985000014 < (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))

Alternative 7: 94.6% accurate, 1.0× speedupMathFPCoreCJuliaTeX?

if (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) < 0.180000007

if 0.180000007 < (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)

Alternative 8: 94.6% accurate, 1.1× speedupMathFPCoreCJuliaTeX?

if (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2) < 0.180000007

if 0.180000007 < (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)

Alternative 9: 88.2% accurate, 2.1× speedupMathFPCoreCJuliaTeX?

Alternative 10: 86.8% accurate, 0.7× speedupMathFPCoreCJuliaTeX?

if (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.0480000004

if 0.0480000004 < (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))

Alternative 11: 86.8% accurate, 0.7× speedupMathFPCoreCJuliaTeX?

if (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.0480000004

if 0.0480000004 < (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))

Alternative 12: 85.8% accurate, 0.7× speedupMathFPCoreCJuliaTeX?

if (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.0500000007

if 0.0500000007 < (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))

Alternative 13: 83.4% accurate, 0.8× speedupMathFPCoreCJuliaTeX?

if (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.00124999997

if 0.00124999997 < (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))

Alternative 14: 79.6% accurate, 0.8× speedupMathFPCoreCJuliaTeX?

if (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.00899999961

if 0.00899999961 < (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))

Alternative 15: 78.8% accurate, 0.8× speedupMathFPCoreCJuliaTeX?

if (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.0500000007

if 0.0500000007 < (*.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (*.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))

Alternative 16: 72.6% accurate, 5.1× speedupMathFPCoreCJuliaTeX?

Alternative 17: 64.9% accurate, 18.5× speedupMathFPCoreCFortranJuliaMATLABTeX?

Reproduce

Initial Program: 57.2% accurate, 1.0× speedup?

Alternative 1: 99.2% accurate, 0.9× speedup?

Alternative 2: 99.1% accurate, 1.0× speedup?

Alternative 3: 97.3% accurate, 0.9× speedup?

`if (-.f32 #s(literal 1 binary32) u1) < 0.997600019`

`if 0.997600019 < (-.f32 #s(literal 1 binary32) u1)`

Alternative 4: 96.8% accurate, 0.6× speedup?

`if (cos.f32 (.f32 (.f32 #s(literal 2 binary32) (PI.f32)) u2)) < 0.998000026`

`if 0.998000026 < (cos.f32 (.f32 (.f32 #s(literal 2 binary32) (PI.f32)) u2))`

Alternative 5: 94.7% accurate, 1.0× speedup?

`if (.f32 (.f32 #s(literal 2 binary32) (PI.f32)) u2) < 0.180000007`

`if 0.180000007 < (.f32 (.f32 #s(literal 2 binary32) (PI.f32)) u2)`

Alternative 6: 94.6% accurate, 0.6× speedup?

`if (cos.f32 (.f32 (.f32 #s(literal 2 binary32) (PI.f32)) u2)) < 0.985000014`

`if 0.985000014 < (cos.f32 (.f32 (.f32 #s(literal 2 binary32) (PI.f32)) u2))`

Alternative 7: 94.6% accurate, 1.0× speedup?

`if (.f32 (.f32 #s(literal 2 binary32) (PI.f32)) u2) < 0.180000007`

`if 0.180000007 < (.f32 (.f32 #s(literal 2 binary32) (PI.f32)) u2)`

Alternative 8: 94.6% accurate, 1.1× speedup?

`if (.f32 (.f32 #s(literal 2 binary32) (PI.f32)) u2) < 0.180000007`

`if 0.180000007 < (.f32 (.f32 #s(literal 2 binary32) (PI.f32)) u2)`

Alternative 9: 88.2% accurate, 2.1× speedup?

Alternative 10: 86.8% accurate, 0.7× speedup?

`if (.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.0480000004`

`if 0.0480000004 < (.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))`

Alternative 11: 86.8% accurate, 0.7× speedup?

`if (.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.0480000004`

`if 0.0480000004 < (.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))`

Alternative 12: 85.8% accurate, 0.7× speedup?

`if (.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.0500000007`

`if 0.0500000007 < (.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))`

Alternative 13: 83.4% accurate, 0.8× speedup?

`if (.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.00124999997`

`if 0.00124999997 < (.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))`

Alternative 14: 79.6% accurate, 0.8× speedup?

`if (.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.00899999961`

`if 0.00899999961 < (.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))`

Alternative 15: 78.8% accurate, 0.8× speedup?

`if (.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2))) < 0.0500000007`

`if 0.0500000007 < (.f32 (sqrt.f32 (neg.f32 (log.f32 (-.f32 #s(literal 1 binary32) u1)))) (cos.f32 (.f32 (*.f32 #s(literal 2 binary32) (PI.f32)) u2)))`

Alternative 16: 72.6% accurate, 5.1× speedup?

Alternative 17: 64.9% accurate, 18.5× speedup?