Result for UniformSampleCone, z

Specification

\[\left(\left(2.328306437 \cdot 10^{-10} \leq ux \land ux \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq uy \land uy \leq 1\right)\right) \land \left(0 \leq maxCos \land maxCos \leq 1\right)\]

\[\left(1 - ux\right) + ux \cdot maxCos \]

(FPCore (ux uy maxCos)
  :precision binary32
  :pre (and (and (and (<= 2.328306437e-10 ux) (<= ux 1.0))
          (and (<= 2.328306437e-10 uy) (<= uy 1.0)))
     (and (<= 0.0 maxCos) (<= maxCos 1.0)))
  (+ (- 1.0 ux) (* ux maxCos)))

float code(float ux, float uy, float maxCos) {
	return (1.0f - ux) + (ux * maxCos);
}

real(4) function code(ux, uy, maxcos)
use fmin_fmax_functions
    real(4), intent (in) :: ux
    real(4), intent (in) :: uy
    real(4), intent (in) :: maxcos
    code = (1.0e0 - ux) + (ux * maxcos)
end function

function code(ux, uy, maxCos)
	return Float32(Float32(Float32(1.0) - ux) + Float32(ux * maxCos))
end

function tmp = code(ux, uy, maxCos)
	tmp = (single(1.0) - ux) + (ux * maxCos);
end

\left(1 - ux\right) + ux \cdot maxCos

Initial Program: 99.9% accurate, 1.0× speedup?

\[\left(1 - ux\right) + ux \cdot maxCos \]

(FPCore (ux uy maxCos)
  :precision binary32
  :pre (and (and (and (<= 2.328306437e-10 ux) (<= ux 1.0))
          (and (<= 2.328306437e-10 uy) (<= uy 1.0)))
     (and (<= 0.0 maxCos) (<= maxCos 1.0)))
  (+ (- 1.0 ux) (* ux maxCos)))

float code(float ux, float uy, float maxCos) {
	return (1.0f - ux) + (ux * maxCos);
}

real(4) function code(ux, uy, maxcos)
use fmin_fmax_functions
    real(4), intent (in) :: ux
    real(4), intent (in) :: uy
    real(4), intent (in) :: maxcos
    code = (1.0e0 - ux) + (ux * maxcos)
end function

function code(ux, uy, maxCos)
	return Float32(Float32(Float32(1.0) - ux) + Float32(ux * maxCos))
end

function tmp = code(ux, uy, maxCos)
	tmp = (single(1.0) - ux) + (ux * maxCos);
end

\left(1 - ux\right) + ux \cdot maxCos

Alternative 1: 99.9% accurate, 1.0× speedup?

\[\left(maxCos \cdot ux - ux\right) - -1 \]

(FPCore (ux uy maxCos)
  :precision binary32
  :pre (and (and (and (<= 2.328306437e-10 ux) (<= ux 1.0))
          (and (<= 2.328306437e-10 uy) (<= uy 1.0)))
     (and (<= 0.0 maxCos) (<= maxCos 1.0)))
  (- (- (* maxCos ux) ux) -1.0))

float code(float ux, float uy, float maxCos) {
	return ((maxCos * ux) - ux) - -1.0f;
}

real(4) function code(ux, uy, maxcos)
use fmin_fmax_functions
    real(4), intent (in) :: ux
    real(4), intent (in) :: uy
    real(4), intent (in) :: maxcos
    code = ((maxcos * ux) - ux) - (-1.0e0)
end function

function code(ux, uy, maxCos)
	return Float32(Float32(Float32(maxCos * ux) - ux) - Float32(-1.0))
end

function tmp = code(ux, uy, maxCos)
	tmp = ((maxCos * ux) - ux) - single(-1.0);
end

\left(maxCos \cdot ux - ux\right) - -1

Derivation

Initial program 99.9%
\[\left(1 - ux\right) + ux \cdot maxCos \]
Step-by-step derivation
1. lift-+.f32N/A
  \[\leadsto \color{blue}{\left(1 - ux\right) + ux \cdot maxCos} \]
2. lift--.f32N/A
  \[\leadsto \color{blue}{\left(1 - ux\right)} + ux \cdot maxCos \]
3. associate-+l-N/A
  \[\leadsto \color{blue}{1 - \left(ux - ux \cdot maxCos\right)} \]
4. sub-negate-revN/A
  \[\leadsto 1 - \color{blue}{\left(\mathsf{neg}\left(\left(ux \cdot maxCos - ux\right)\right)\right)} \]
5. add-flip-revN/A
  \[\leadsto \color{blue}{1 + \left(ux \cdot maxCos - ux\right)} \]
6. +-commutativeN/A
  \[\leadsto \color{blue}{\left(ux \cdot maxCos - ux\right) + 1} \]
7. add-flip-revN/A
  \[\leadsto \color{blue}{\left(ux \cdot maxCos - ux\right) - \left(\mathsf{neg}\left(1\right)\right)} \]
8. lower--.f32N/A
  \[\leadsto \color{blue}{\left(ux \cdot maxCos - ux\right) - \left(\mathsf{neg}\left(1\right)\right)} \]
9. remove-double-negN/A
  \[\leadsto \left(\color{blue}{\left(\mathsf{neg}\left(\left(\mathsf{neg}\left(ux \cdot maxCos\right)\right)\right)\right)} - ux\right) - \left(\mathsf{neg}\left(1\right)\right) \]
10. lift-*.f32N/A
  \[\leadsto \left(\left(\mathsf{neg}\left(\left(\mathsf{neg}\left(\color{blue}{ux \cdot maxCos}\right)\right)\right)\right) - ux\right) - \left(\mathsf{neg}\left(1\right)\right) \]
11. distribute-lft-neg-outN/A
  \[\leadsto \left(\left(\mathsf{neg}\left(\color{blue}{\left(\mathsf{neg}\left(ux\right)\right) \cdot maxCos}\right)\right) - ux\right) - \left(\mathsf{neg}\left(1\right)\right) \]
12. lower--.f32N/A
  \[\leadsto \color{blue}{\left(\left(\mathsf{neg}\left(\left(\mathsf{neg}\left(ux\right)\right) \cdot maxCos\right)\right) - ux\right)} - \left(\mathsf{neg}\left(1\right)\right) \]
13. distribute-lft-neg-outN/A
  \[\leadsto \left(\left(\mathsf{neg}\left(\color{blue}{\left(\mathsf{neg}\left(ux \cdot maxCos\right)\right)}\right)\right) - ux\right) - \left(\mathsf{neg}\left(1\right)\right) \]
14. lift-*.f32N/A
  \[\leadsto \left(\left(\mathsf{neg}\left(\left(\mathsf{neg}\left(\color{blue}{ux \cdot maxCos}\right)\right)\right)\right) - ux\right) - \left(\mathsf{neg}\left(1\right)\right) \]
15. remove-double-negN/A
  \[\leadsto \left(\color{blue}{ux \cdot maxCos} - ux\right) - \left(\mathsf{neg}\left(1\right)\right) \]
16. lift-*.f32N/A
  \[\leadsto \left(\color{blue}{ux \cdot maxCos} - ux\right) - \left(\mathsf{neg}\left(1\right)\right) \]
17. *-commutativeN/A
  \[\leadsto \left(\color{blue}{maxCos \cdot ux} - ux\right) - \left(\mathsf{neg}\left(1\right)\right) \]
18. lower-*.f32N/A
  \[\leadsto \left(\color{blue}{maxCos \cdot ux} - ux\right) - \left(\mathsf{neg}\left(1\right)\right) \]
19. metadata-eval99.9%
  \[\leadsto \left(maxCos \cdot ux - ux\right) - \color{blue}{-1} \]
Applied rewrites99.9%
\[\leadsto \color{blue}{\left(maxCos \cdot ux - ux\right) - -1} \]
Add Preprocessing

Alternative 2: 99.9% accurate, 1.1× speedup?

\[\mathsf{fma}\left(ux, maxCos, 1 - ux\right) \]

(FPCore (ux uy maxCos)
  :precision binary32
  :pre (and (and (and (<= 2.328306437e-10 ux) (<= ux 1.0))
          (and (<= 2.328306437e-10 uy) (<= uy 1.0)))
     (and (<= 0.0 maxCos) (<= maxCos 1.0)))
  (fma ux maxCos (- 1.0 ux)))

float code(float ux, float uy, float maxCos) {
	return fmaf(ux, maxCos, (1.0f - ux));
}

function code(ux, uy, maxCos)
	return fma(ux, maxCos, Float32(Float32(1.0) - ux))
end

\mathsf{fma}\left(ux, maxCos, 1 - ux\right)

Derivation

Initial program 99.9%
\[\left(1 - ux\right) + ux \cdot maxCos \]
Step-by-step derivation
1. lift-+.f32N/A
  \[\leadsto \color{blue}{\left(1 - ux\right) + ux \cdot maxCos} \]
2. +-commutativeN/A
  \[\leadsto \color{blue}{ux \cdot maxCos + \left(1 - ux\right)} \]
3. lift-*.f32N/A
  \[\leadsto \color{blue}{ux \cdot maxCos} + \left(1 - ux\right) \]
4. lower-fma.f3299.9%
  \[\leadsto \color{blue}{\mathsf{fma}\left(ux, maxCos, 1 - ux\right)} \]
Applied rewrites99.9%
\[\leadsto \color{blue}{\mathsf{fma}\left(ux, maxCos, 1 - ux\right)} \]
Add Preprocessing

Alternative 3: 99.8% accurate, 1.1× speedup?

\[\mathsf{fma}\left(maxCos, ux, 1\right) - ux \]

(FPCore (ux uy maxCos)
  :precision binary32
  :pre (and (and (and (<= 2.328306437e-10 ux) (<= ux 1.0))
          (and (<= 2.328306437e-10 uy) (<= uy 1.0)))
     (and (<= 0.0 maxCos) (<= maxCos 1.0)))
  (- (fma maxCos ux 1.0) ux))

float code(float ux, float uy, float maxCos) {
	return fmaf(maxCos, ux, 1.0f) - ux;
}

function code(ux, uy, maxCos)
	return Float32(fma(maxCos, ux, Float32(1.0)) - ux)
end

\mathsf{fma}\left(maxCos, ux, 1\right) - ux

Derivation

Initial program 99.9%
\[\left(1 - ux\right) + ux \cdot maxCos \]
Step-by-step derivation
1. lift-+.f32N/A
  \[\leadsto \color{blue}{\left(1 - ux\right) + ux \cdot maxCos} \]
2. lift-*.f32N/A
  \[\leadsto \left(1 - ux\right) + \color{blue}{ux \cdot maxCos} \]
3. fp-cancel-sign-sub-invN/A
  \[\leadsto \color{blue}{\left(1 - ux\right) - \left(\mathsf{neg}\left(ux\right)\right) \cdot maxCos} \]
4. sub-flipN/A
  \[\leadsto \color{blue}{\left(1 - ux\right) + \left(\mathsf{neg}\left(\left(\mathsf{neg}\left(ux\right)\right) \cdot maxCos\right)\right)} \]
5. +-commutativeN/A
  \[\leadsto \color{blue}{\left(\mathsf{neg}\left(\left(\mathsf{neg}\left(ux\right)\right) \cdot maxCos\right)\right) + \left(1 - ux\right)} \]
6. lift--.f32N/A
  \[\leadsto \left(\mathsf{neg}\left(\left(\mathsf{neg}\left(ux\right)\right) \cdot maxCos\right)\right) + \color{blue}{\left(1 - ux\right)} \]
7. associate--l+N/A
  \[\leadsto \color{blue}{\left(\left(\mathsf{neg}\left(\left(\mathsf{neg}\left(ux\right)\right) \cdot maxCos\right)\right) + 1\right) - ux} \]
8. lower--.f32N/A
  \[\leadsto \color{blue}{\left(\left(\mathsf{neg}\left(\left(\mathsf{neg}\left(ux\right)\right) \cdot maxCos\right)\right) + 1\right) - ux} \]
9. distribute-lft-neg-outN/A
  \[\leadsto \left(\left(\mathsf{neg}\left(\color{blue}{\left(\mathsf{neg}\left(ux \cdot maxCos\right)\right)}\right)\right) + 1\right) - ux \]
10. lift-*.f32N/A
  \[\leadsto \left(\left(\mathsf{neg}\left(\left(\mathsf{neg}\left(\color{blue}{ux \cdot maxCos}\right)\right)\right)\right) + 1\right) - ux \]
11. remove-double-negN/A
  \[\leadsto \left(\color{blue}{ux \cdot maxCos} + 1\right) - ux \]
12. lift-*.f32N/A
  \[\leadsto \left(\color{blue}{ux \cdot maxCos} + 1\right) - ux \]
13. *-commutativeN/A
  \[\leadsto \left(\color{blue}{maxCos \cdot ux} + 1\right) - ux \]
14. lower-fma.f3299.8%
  \[\leadsto \color{blue}{\mathsf{fma}\left(maxCos, ux, 1\right)} - ux \]
Applied rewrites99.8%
\[\leadsto \color{blue}{\mathsf{fma}\left(maxCos, ux, 1\right) - ux} \]
Add Preprocessing

Alternative 4: 98.1% accurate, 2.6× speedup?

\[1 - ux \]

(FPCore (ux uy maxCos)
  :precision binary32
  :pre (and (and (and (<= 2.328306437e-10 ux) (<= ux 1.0))
          (and (<= 2.328306437e-10 uy) (<= uy 1.0)))
     (and (<= 0.0 maxCos) (<= maxCos 1.0)))
  (- 1.0 ux))

float code(float ux, float uy, float maxCos) {
	return 1.0f - ux;
}

real(4) function code(ux, uy, maxcos)
use fmin_fmax_functions
    real(4), intent (in) :: ux
    real(4), intent (in) :: uy
    real(4), intent (in) :: maxcos
    code = 1.0e0 - ux
end function

function code(ux, uy, maxCos)
	return Float32(Float32(1.0) - ux)
end

function tmp = code(ux, uy, maxCos)
	tmp = single(1.0) - ux;
end

1 - ux

Derivation

Initial program 99.9%
\[\left(1 - ux\right) + ux \cdot maxCos \]
Taylor expanded in maxCos around 0
\[\leadsto \color{blue}{1 - ux} \]
Step-by-step derivation
1. lower--.f3298.1%
  \[\leadsto 1 - \color{blue}{ux} \]
Applied rewrites98.1%
\[\leadsto \color{blue}{1 - ux} \]
Add Preprocessing

Alternative 5: 71.4% accurate, 9.2× speedup?

\[1 \]

(FPCore (ux uy maxCos)
  :precision binary32
  :pre (and (and (and (<= 2.328306437e-10 ux) (<= ux 1.0))
          (and (<= 2.328306437e-10 uy) (<= uy 1.0)))
     (and (<= 0.0 maxCos) (<= maxCos 1.0)))
  1.0)

float code(float ux, float uy, float maxCos) {
	return 1.0f;
}

real(4) function code(ux, uy, maxcos)
use fmin_fmax_functions
    real(4), intent (in) :: ux
    real(4), intent (in) :: uy
    real(4), intent (in) :: maxcos
    code = 1.0e0
end function

function code(ux, uy, maxCos)
	return Float32(1.0)
end

function tmp = code(ux, uy, maxCos)
	tmp = single(1.0);
end

Derivation

Initial program 99.9%
\[\left(1 - ux\right) + ux \cdot maxCos \]
Taylor expanded in maxCos around 0
\[\leadsto \color{blue}{1 - ux} \]
Step-by-step derivation
1. lower--.f3298.1%
  \[\leadsto 1 - \color{blue}{ux} \]
Applied rewrites98.1%
\[\leadsto \color{blue}{1 - ux} \]
Taylor expanded in ux around 0
\[\leadsto 1 \]
Step-by-step derivation
Applied rewrites71.4%
\[\leadsto 1 \]
Add Preprocessing

UniformSampleCone, z

Specification

Local Percentage Accuracy vs ?

Accuracy vs Speed?

Initial Program: 99.9% accurate, 1.0× speedup?

Alternative 1: 99.9% accurate, 1.0× speedup?

Alternative 2: 99.9% accurate, 1.1× speedup?

Alternative 3: 99.8% accurate, 1.1× speedup?

Alternative 4: 98.1% accurate, 2.6× speedup?

Alternative 5: 71.4% accurate, 9.2× speedup?

Reproduce

Specification

Local Percentage Accuracy vs ?

Accuracy vs Speed?

Initial Program: 99.9% accurate, 1.0× speedupMathFPCoreCFortranJuliaMATLABTeX?

Alternative 1: 99.9% accurate, 1.0× speedupMathFPCoreCFortranJuliaMATLABTeX?

Alternative 2: 99.9% accurate, 1.1× speedupMathFPCoreCJuliaTeX?

Alternative 3: 99.8% accurate, 1.1× speedupMathFPCoreCJuliaTeX?

Alternative 4: 98.1% accurate, 2.6× speedupMathFPCoreCFortranJuliaMATLABTeX?

Alternative 5: 71.4% accurate, 9.2× speedupMathFPCoreCFortranJuliaMATLABTeX?

Reproduce

Initial Program: 99.9% accurate, 1.0× speedup?

Alternative 1: 99.9% accurate, 1.0× speedup?

Alternative 2: 99.9% accurate, 1.1× speedup?

Alternative 3: 99.8% accurate, 1.1× speedup?

Alternative 4: 98.1% accurate, 2.6× speedup?

Alternative 5: 71.4% accurate, 9.2× speedup?