
(FPCore (xi yi zi ux uy maxCos)
:precision binary32
(let* ((t_0 (* (* (- 1.0 ux) maxCos) ux))
(t_1 (sqrt (- 1.0 (* t_0 t_0))))
(t_2 (* (* uy 2.0) (PI))))
(+ (+ (* (* (cos t_2) t_1) xi) (* (* (sin t_2) t_1) yi)) (* t_0 zi))))\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\
t_1 := \sqrt{1 - t\_0 \cdot t\_0}\\
t_2 := \left(uy \cdot 2\right) \cdot \mathsf{PI}\left(\right)\\
\left(\left(\cos t\_2 \cdot t\_1\right) \cdot xi + \left(\sin t\_2 \cdot t\_1\right) \cdot yi\right) + t\_0 \cdot zi
\end{array}
\end{array}
Sampling outcomes in binary32 precision:
Herbie found 11 alternatives:
| Alternative | Accuracy | Speedup |
|---|
(FPCore (xi yi zi ux uy maxCos)
:precision binary32
(let* ((t_0 (* (* (- 1.0 ux) maxCos) ux))
(t_1 (sqrt (- 1.0 (* t_0 t_0))))
(t_2 (* (* uy 2.0) (PI))))
(+ (+ (* (* (cos t_2) t_1) xi) (* (* (sin t_2) t_1) yi)) (* t_0 zi))))\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(\left(1 - ux\right) \cdot maxCos\right) \cdot ux\\
t_1 := \sqrt{1 - t\_0 \cdot t\_0}\\
t_2 := \left(uy \cdot 2\right) \cdot \mathsf{PI}\left(\right)\\
\left(\left(\cos t\_2 \cdot t\_1\right) \cdot xi + \left(\sin t\_2 \cdot t\_1\right) \cdot yi\right) + t\_0 \cdot zi
\end{array}
\end{array}
(FPCore (xi yi zi ux uy maxCos)
:precision binary32
(let* ((t_0 (* (* maxCos (- 1.0 ux)) ux))
(t_1 (sqrt (- 1.0 (* t_0 t_0))))
(t_2 (* (PI) (* 2.0 uy))))
(-
(+ (* yi (* (sin t_2) t_1)) (* xi (* t_1 (cos t_2))))
(* (- ux 1.0) (* zi (* maxCos ux))))))\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(maxCos \cdot \left(1 - ux\right)\right) \cdot ux\\
t_1 := \sqrt{1 - t\_0 \cdot t\_0}\\
t_2 := \mathsf{PI}\left(\right) \cdot \left(2 \cdot uy\right)\\
\left(yi \cdot \left(\sin t\_2 \cdot t\_1\right) + xi \cdot \left(t\_1 \cdot \cos t\_2\right)\right) - \left(ux - 1\right) \cdot \left(zi \cdot \left(maxCos \cdot ux\right)\right)
\end{array}
\end{array}
Initial program 99.0%
lift-*.f32N/A
lift-*.f32N/A
lift-*.f32N/A
associate-*l*N/A
associate-*l*N/A
lower-*.f32N/A
lower-*.f32N/A
lower-*.f3299.0
Applied rewrites99.0%
Final simplification99.0%
(FPCore (xi yi zi ux uy maxCos)
:precision binary32
(let* ((t_0 (* (* maxCos (- 1.0 ux)) ux)))
(-
(+
(* (cos (* (* (PI) uy) 2.0)) xi)
(* yi (* (sin (* (PI) (* 2.0 uy))) (sqrt (- 1.0 (* t_0 t_0))))))
(* (- ux 1.0) (* zi (* maxCos ux))))))\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(maxCos \cdot \left(1 - ux\right)\right) \cdot ux\\
\left(\cos \left(\left(\mathsf{PI}\left(\right) \cdot uy\right) \cdot 2\right) \cdot xi + yi \cdot \left(\sin \left(\mathsf{PI}\left(\right) \cdot \left(2 \cdot uy\right)\right) \cdot \sqrt{1 - t\_0 \cdot t\_0}\right)\right) - \left(ux - 1\right) \cdot \left(zi \cdot \left(maxCos \cdot ux\right)\right)
\end{array}
\end{array}
Initial program 99.0%
lift-*.f32N/A
lift-*.f32N/A
lift-*.f32N/A
associate-*l*N/A
associate-*l*N/A
lower-*.f32N/A
lower-*.f32N/A
lower-*.f3299.0
Applied rewrites99.0%
Taylor expanded in ux around 0
lower-cos.f32N/A
*-commutativeN/A
lower-*.f32N/A
*-commutativeN/A
lower-*.f32N/A
lower-PI.f3299.0
Applied rewrites99.0%
Final simplification99.0%
(FPCore (xi yi zi ux uy maxCos)
:precision binary32
(let* ((t_0 (* (* maxCos (- 1.0 ux)) ux)) (t_1 (sqrt (- 1.0 (* t_0 t_0)))))
(-
(+
(* (* (fma (* (* uy uy) -2.0) (* (PI) (PI)) 1.0) t_1) xi)
(* yi (* (sin (* (PI) (* 2.0 uy))) t_1)))
(* (- ux 1.0) (* zi (* maxCos ux))))))\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(maxCos \cdot \left(1 - ux\right)\right) \cdot ux\\
t_1 := \sqrt{1 - t\_0 \cdot t\_0}\\
\left(\left(\mathsf{fma}\left(\left(uy \cdot uy\right) \cdot -2, \mathsf{PI}\left(\right) \cdot \mathsf{PI}\left(\right), 1\right) \cdot t\_1\right) \cdot xi + yi \cdot \left(\sin \left(\mathsf{PI}\left(\right) \cdot \left(2 \cdot uy\right)\right) \cdot t\_1\right)\right) - \left(ux - 1\right) \cdot \left(zi \cdot \left(maxCos \cdot ux\right)\right)
\end{array}
\end{array}
Initial program 99.0%
lift-*.f32N/A
lift-*.f32N/A
lift-*.f32N/A
associate-*l*N/A
associate-*l*N/A
lower-*.f32N/A
lower-*.f32N/A
lower-*.f3299.0
Applied rewrites99.0%
Taylor expanded in uy around 0
+-commutativeN/A
associate-*r*N/A
lower-fma.f32N/A
lower-*.f32N/A
unpow2N/A
lower-*.f32N/A
unpow2N/A
lower-*.f32N/A
lower-PI.f32N/A
lower-PI.f3259.8
Applied rewrites60.5%
Final simplification58.8%
(FPCore (xi yi zi ux uy maxCos)
:precision binary32
(let* ((t_0 (* (* maxCos (- 1.0 ux)) ux)))
(-
(+
(* (fma (* (* uy uy) -2.0) (* (PI) (PI)) 1.0) xi)
(* yi (* (sin (* (PI) (* 2.0 uy))) (sqrt (- 1.0 (* t_0 t_0))))))
(* (- ux 1.0) (* zi (* maxCos ux))))))\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(maxCos \cdot \left(1 - ux\right)\right) \cdot ux\\
\left(\mathsf{fma}\left(\left(uy \cdot uy\right) \cdot -2, \mathsf{PI}\left(\right) \cdot \mathsf{PI}\left(\right), 1\right) \cdot xi + yi \cdot \left(\sin \left(\mathsf{PI}\left(\right) \cdot \left(2 \cdot uy\right)\right) \cdot \sqrt{1 - t\_0 \cdot t\_0}\right)\right) - \left(ux - 1\right) \cdot \left(zi \cdot \left(maxCos \cdot ux\right)\right)
\end{array}
\end{array}
Initial program 99.0%
lift-*.f32N/A
lift-*.f32N/A
lift-*.f32N/A
associate-*l*N/A
associate-*l*N/A
lower-*.f32N/A
lower-*.f32N/A
lower-*.f3299.0
Applied rewrites99.0%
Taylor expanded in ux around 0
lower-cos.f32N/A
*-commutativeN/A
lower-*.f32N/A
*-commutativeN/A
lower-*.f32N/A
lower-PI.f3299.0
Applied rewrites99.0%
Taylor expanded in uy around 0
Applied rewrites89.5%
Final simplification89.9%
(FPCore (xi yi zi ux uy maxCos)
:precision binary32
(let* ((t_0 (* (* maxCos (- 1.0 ux)) ux)) (t_1 (* (* (PI) uy) 2.0)))
(-
(+ (* (* t_1 (sqrt (- 1.0 (* t_0 t_0)))) yi) (* (cos t_1) xi))
(* (- ux 1.0) (* zi (* maxCos ux))))))\begin{array}{l}
\\
\begin{array}{l}
t_0 := \left(maxCos \cdot \left(1 - ux\right)\right) \cdot ux\\
t_1 := \left(\mathsf{PI}\left(\right) \cdot uy\right) \cdot 2\\
\left(\left(t\_1 \cdot \sqrt{1 - t\_0 \cdot t\_0}\right) \cdot yi + \cos t\_1 \cdot xi\right) - \left(ux - 1\right) \cdot \left(zi \cdot \left(maxCos \cdot ux\right)\right)
\end{array}
\end{array}
Initial program 99.0%
lift-*.f32N/A
lift-*.f32N/A
lift-*.f32N/A
associate-*l*N/A
associate-*l*N/A
lower-*.f32N/A
lower-*.f32N/A
lower-*.f3299.0
Applied rewrites99.0%
Taylor expanded in ux around 0
lower-cos.f32N/A
*-commutativeN/A
lower-*.f32N/A
*-commutativeN/A
lower-*.f32N/A
lower-PI.f3299.0
Applied rewrites99.0%
Taylor expanded in uy around 0
*-commutativeN/A
lower-*.f32N/A
*-commutativeN/A
lower-*.f32N/A
lower-PI.f3287.7
Applied rewrites87.7%
Final simplification87.7%
(FPCore (xi yi zi ux uy maxCos) :precision binary32 (- (* (sqrt (- 1.0 (* (* maxCos maxCos) (* (* ux ux) (pow (- 1.0 ux) 2.0))))) xi) (* (* (* (- ux 1.0) maxCos) ux) zi)))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
return (sqrtf((1.0f - ((maxCos * maxCos) * ((ux * ux) * powf((1.0f - ux), 2.0f))))) * xi) - ((((ux - 1.0f) * maxCos) * ux) * zi);
}
real(4) function code(xi, yi, zi, ux, uy, maxcos)
real(4), intent (in) :: xi
real(4), intent (in) :: yi
real(4), intent (in) :: zi
real(4), intent (in) :: ux
real(4), intent (in) :: uy
real(4), intent (in) :: maxcos
code = (sqrt((1.0e0 - ((maxcos * maxcos) * ((ux * ux) * ((1.0e0 - ux) ** 2.0e0))))) * xi) - ((((ux - 1.0e0) * maxcos) * ux) * zi)
end function
function code(xi, yi, zi, ux, uy, maxCos) return Float32(Float32(sqrt(Float32(Float32(1.0) - Float32(Float32(maxCos * maxCos) * Float32(Float32(ux * ux) * (Float32(Float32(1.0) - ux) ^ Float32(2.0)))))) * xi) - Float32(Float32(Float32(Float32(ux - Float32(1.0)) * maxCos) * ux) * zi)) end
function tmp = code(xi, yi, zi, ux, uy, maxCos) tmp = (sqrt((single(1.0) - ((maxCos * maxCos) * ((ux * ux) * ((single(1.0) - ux) ^ single(2.0)))))) * xi) - ((((ux - single(1.0)) * maxCos) * ux) * zi); end
\begin{array}{l}
\\
\sqrt{1 - \left(maxCos \cdot maxCos\right) \cdot \left(\left(ux \cdot ux\right) \cdot {\left(1 - ux\right)}^{2}\right)} \cdot xi - \left(\left(\left(ux - 1\right) \cdot maxCos\right) \cdot ux\right) \cdot zi
\end{array}
Initial program 99.0%
Taylor expanded in uy around 0
*-commutativeN/A
lower-*.f32N/A
lower-sqrt.f32N/A
lower--.f32N/A
*-commutativeN/A
lower-*.f32N/A
*-commutativeN/A
lower-*.f32N/A
lower-pow.f32N/A
lower--.f32N/A
unpow2N/A
lower-*.f32N/A
unpow2N/A
lower-*.f3250.7
Applied rewrites50.7%
Final simplification50.7%
(FPCore (xi yi zi ux uy maxCos) :precision binary32 (- (* (sqrt (- 1.0 (pow (* (* maxCos (- 1.0 ux)) ux) 2.0))) xi) (* (* (* (- ux 1.0) zi) ux) maxCos)))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
return (sqrtf((1.0f - powf(((maxCos * (1.0f - ux)) * ux), 2.0f))) * xi) - ((((ux - 1.0f) * zi) * ux) * maxCos);
}
real(4) function code(xi, yi, zi, ux, uy, maxcos)
real(4), intent (in) :: xi
real(4), intent (in) :: yi
real(4), intent (in) :: zi
real(4), intent (in) :: ux
real(4), intent (in) :: uy
real(4), intent (in) :: maxcos
code = (sqrt((1.0e0 - (((maxcos * (1.0e0 - ux)) * ux) ** 2.0e0))) * xi) - ((((ux - 1.0e0) * zi) * ux) * maxcos)
end function
function code(xi, yi, zi, ux, uy, maxCos) return Float32(Float32(sqrt(Float32(Float32(1.0) - (Float32(Float32(maxCos * Float32(Float32(1.0) - ux)) * ux) ^ Float32(2.0)))) * xi) - Float32(Float32(Float32(Float32(ux - Float32(1.0)) * zi) * ux) * maxCos)) end
function tmp = code(xi, yi, zi, ux, uy, maxCos) tmp = (sqrt((single(1.0) - (((maxCos * (single(1.0) - ux)) * ux) ^ single(2.0)))) * xi) - ((((ux - single(1.0)) * zi) * ux) * maxCos); end
\begin{array}{l}
\\
\sqrt{1 - {\left(\left(maxCos \cdot \left(1 - ux\right)\right) \cdot ux\right)}^{2}} \cdot xi - \left(\left(\left(ux - 1\right) \cdot zi\right) \cdot ux\right) \cdot maxCos
\end{array}
Initial program 99.0%
Taylor expanded in uy around 0
+-commutativeN/A
*-commutativeN/A
lower-fma.f32N/A
Applied rewrites13.6%
Applied rewrites50.6%
Final simplification50.6%
(FPCore (xi yi zi ux uy maxCos) :precision binary32 (+ (* (* zi ux) maxCos) xi))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
return ((zi * ux) * maxCos) + xi;
}
real(4) function code(xi, yi, zi, ux, uy, maxcos)
real(4), intent (in) :: xi
real(4), intent (in) :: yi
real(4), intent (in) :: zi
real(4), intent (in) :: ux
real(4), intent (in) :: uy
real(4), intent (in) :: maxcos
code = ((zi * ux) * maxcos) + xi
end function
function code(xi, yi, zi, ux, uy, maxCos) return Float32(Float32(Float32(zi * ux) * maxCos) + xi) end
function tmp = code(xi, yi, zi, ux, uy, maxCos) tmp = ((zi * ux) * maxCos) + xi; end
\begin{array}{l}
\\
\left(zi \cdot ux\right) \cdot maxCos + xi
\end{array}
Initial program 99.0%
Taylor expanded in uy around 0
+-commutativeN/A
*-commutativeN/A
lower-fma.f32N/A
Applied rewrites13.6%
Taylor expanded in ux around 0
Applied rewrites44.8%
Applied rewrites48.2%
(FPCore (xi yi zi ux uy maxCos) :precision binary32 (fma maxCos (* zi ux) xi))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
return fmaf(maxCos, (zi * ux), xi);
}
function code(xi, yi, zi, ux, uy, maxCos) return fma(maxCos, Float32(zi * ux), xi) end
\begin{array}{l}
\\
\mathsf{fma}\left(maxCos, zi \cdot ux, xi\right)
\end{array}
Initial program 99.0%
Taylor expanded in uy around 0
+-commutativeN/A
*-commutativeN/A
lower-fma.f32N/A
Applied rewrites13.6%
Taylor expanded in ux around 0
Applied rewrites44.8%
Applied rewrites42.7%
Applied rewrites44.8%
(FPCore (xi yi zi ux uy maxCos) :precision binary32 (* (* zi maxCos) ux))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
return (zi * maxCos) * ux;
}
real(4) function code(xi, yi, zi, ux, uy, maxcos)
real(4), intent (in) :: xi
real(4), intent (in) :: yi
real(4), intent (in) :: zi
real(4), intent (in) :: ux
real(4), intent (in) :: uy
real(4), intent (in) :: maxcos
code = (zi * maxcos) * ux
end function
function code(xi, yi, zi, ux, uy, maxCos) return Float32(Float32(zi * maxCos) * ux) end
function tmp = code(xi, yi, zi, ux, uy, maxCos) tmp = (zi * maxCos) * ux; end
\begin{array}{l}
\\
\left(zi \cdot maxCos\right) \cdot ux
\end{array}
Initial program 99.0%
Taylor expanded in uy around 0
+-commutativeN/A
*-commutativeN/A
lower-fma.f32N/A
Applied rewrites13.6%
Taylor expanded in ux around 0
Applied rewrites44.8%
Taylor expanded in xi around 0
Applied rewrites11.6%
Applied rewrites11.6%
(FPCore (xi yi zi ux uy maxCos) :precision binary32 (* zi (* maxCos ux)))
float code(float xi, float yi, float zi, float ux, float uy, float maxCos) {
return zi * (maxCos * ux);
}
real(4) function code(xi, yi, zi, ux, uy, maxcos)
real(4), intent (in) :: xi
real(4), intent (in) :: yi
real(4), intent (in) :: zi
real(4), intent (in) :: ux
real(4), intent (in) :: uy
real(4), intent (in) :: maxcos
code = zi * (maxcos * ux)
end function
function code(xi, yi, zi, ux, uy, maxCos) return Float32(zi * Float32(maxCos * ux)) end
function tmp = code(xi, yi, zi, ux, uy, maxCos) tmp = zi * (maxCos * ux); end
\begin{array}{l}
\\
zi \cdot \left(maxCos \cdot ux\right)
\end{array}
Initial program 99.0%
Taylor expanded in uy around 0
+-commutativeN/A
*-commutativeN/A
lower-fma.f32N/A
Applied rewrites13.6%
Taylor expanded in ux around 0
Applied rewrites44.8%
Taylor expanded in xi around 0
Applied rewrites11.6%
Applied rewrites11.6%
Final simplification11.6%
herbie shell --seed 2024332
(FPCore (xi yi zi ux uy maxCos)
:name "UniformSampleCone 2"
:precision binary32
:pre (and (and (and (and (and (and (<= -10000.0 xi) (<= xi 10000.0)) (and (<= -10000.0 yi) (<= yi 10000.0))) (and (<= -10000.0 zi) (<= zi 10000.0))) (and (<= 2.328306437e-10 ux) (<= ux 1.0))) (and (<= 2.328306437e-10 uy) (<= uy 1.0))) (and (<= 0.0 maxCos) (<= maxCos 1.0)))
(+ (+ (* (* (cos (* (* uy 2.0) (PI))) (sqrt (- 1.0 (* (* (* (- 1.0 ux) maxCos) ux) (* (* (- 1.0 ux) maxCos) ux))))) xi) (* (* (sin (* (* uy 2.0) (PI))) (sqrt (- 1.0 (* (* (* (- 1.0 ux) maxCos) ux) (* (* (- 1.0 ux) maxCos) ux))))) yi)) (* (* (* (- 1.0 ux) maxCos) ux) zi)))