Result for UniformSampleCone, z

Specification

\[\left(\left(2.328306437 \cdot 10^{-10} \leq ux \land ux \leq 1\right) \land \left(2.328306437 \cdot 10^{-10} \leq uy \land uy \leq 1\right)\right) \land \left(0 \leq maxCos \land maxCos \leq 1\right)\]

\[\left(1 - ux\right) + ux \cdot maxCos \]

(FPCore (ux uy maxCos)
  :precision binary32
  (+ (- 1.0 ux) (* ux maxCos)))

float code(float ux, float uy, float maxCos) {
	return (1.0f - ux) + (ux * maxCos);
}

real(4) function code(ux, uy, maxcos)
use fmin_fmax_functions
    real(4), intent (in) :: ux
    real(4), intent (in) :: uy
    real(4), intent (in) :: maxcos
    code = (1.0e0 - ux) + (ux * maxcos)
end function

function code(ux, uy, maxCos)
	return Float32(Float32(Float32(1.0) - ux) + Float32(ux * maxCos))
end

function tmp = code(ux, uy, maxCos)
	tmp = (single(1.0) - ux) + (ux * maxCos);
end

\left(1 - ux\right) + ux \cdot maxCos

Initial Program: 99.9% accurate, 1.0× speedup?

\[\left(1 - ux\right) + ux \cdot maxCos \]

(FPCore (ux uy maxCos)
  :precision binary32
  (+ (- 1.0 ux) (* ux maxCos)))

float code(float ux, float uy, float maxCos) {
	return (1.0f - ux) + (ux * maxCos);
}

real(4) function code(ux, uy, maxcos)
use fmin_fmax_functions
    real(4), intent (in) :: ux
    real(4), intent (in) :: uy
    real(4), intent (in) :: maxcos
    code = (1.0e0 - ux) + (ux * maxcos)
end function

function code(ux, uy, maxCos)
	return Float32(Float32(Float32(1.0) - ux) + Float32(ux * maxCos))
end

function tmp = code(ux, uy, maxCos)
	tmp = (single(1.0) - ux) + (ux * maxCos);
end

\left(1 - ux\right) + ux \cdot maxCos

Alternative 1: 99.9% accurate, 1.1× speedup?

\[\mathsf{fma}\left(maxCos - 1, ux, 1\right) \]

(FPCore (ux uy maxCos)
  :precision binary32
  (fma (- maxCos 1.0) ux 1.0))

float code(float ux, float uy, float maxCos) {
	return fmaf((maxCos - 1.0f), ux, 1.0f);
}

function code(ux, uy, maxCos)
	return fma(Float32(maxCos - Float32(1.0)), ux, Float32(1.0))
end

\mathsf{fma}\left(maxCos - 1, ux, 1\right)

Derivation

Initial program 99.9%
\[\left(1 - ux\right) + ux \cdot maxCos \]
Step-by-step derivation
1. lift-+.f32N/A
  \[\leadsto \color{blue}{\left(1 - ux\right) + ux \cdot maxCos} \]
2. +-commutativeN/A
  \[\leadsto \color{blue}{ux \cdot maxCos + \left(1 - ux\right)} \]
3. lift-*.f32N/A
  \[\leadsto \color{blue}{ux \cdot maxCos} + \left(1 - ux\right) \]
4. lower-fma.f3299.9%
  \[\leadsto \color{blue}{\mathsf{fma}\left(ux, maxCos, 1 - ux\right)} \]
Applied rewrites99.9%
\[\leadsto \color{blue}{\mathsf{fma}\left(ux, maxCos, 1 - ux\right)} \]
Step-by-step derivation
1. lift-fma.f32N/A
  \[\leadsto \color{blue}{ux \cdot maxCos + \left(1 - ux\right)} \]
2. lift-*.f32N/A
  \[\leadsto \color{blue}{ux \cdot maxCos} + \left(1 - ux\right) \]
3. +-commutativeN/A
  \[\leadsto \color{blue}{\left(1 - ux\right) + ux \cdot maxCos} \]
4. lift--.f32N/A
  \[\leadsto \color{blue}{\left(1 - ux\right)} + ux \cdot maxCos \]
5. associate-+l-N/A
  \[\leadsto \color{blue}{1 - \left(ux - ux \cdot maxCos\right)} \]
6. lift-*.f32N/A
  \[\leadsto 1 - \left(ux - \color{blue}{ux \cdot maxCos}\right) \]
7. *-commutativeN/A
  \[\leadsto 1 - \left(ux - \color{blue}{maxCos \cdot ux}\right) \]
8. fp-cancel-sub-sign-invN/A
  \[\leadsto 1 - \color{blue}{\left(ux + \left(\mathsf{neg}\left(maxCos\right)\right) \cdot ux\right)} \]
9. distribute-rgt1-inN/A
  \[\leadsto 1 - \color{blue}{\left(\left(\mathsf{neg}\left(maxCos\right)\right) + 1\right) \cdot ux} \]
10. +-commutativeN/A
  \[\leadsto 1 - \color{blue}{\left(1 + \left(\mathsf{neg}\left(maxCos\right)\right)\right)} \cdot ux \]
11. sub-flipN/A
  \[\leadsto 1 - \color{blue}{\left(1 - maxCos\right)} \cdot ux \]
12. sub-negate-revN/A
  \[\leadsto 1 - \color{blue}{\left(\mathsf{neg}\left(\left(maxCos - 1\right)\right)\right)} \cdot ux \]
13. lift--.f32N/A
  \[\leadsto 1 - \left(\mathsf{neg}\left(\color{blue}{\left(maxCos - 1\right)}\right)\right) \cdot ux \]
14. fp-cancel-sign-sub-invN/A
  \[\leadsto \color{blue}{1 + \left(maxCos - 1\right) \cdot ux} \]
15. *-commutativeN/A
  \[\leadsto 1 + \color{blue}{ux \cdot \left(maxCos - 1\right)} \]
16. lift-*.f32N/A
  \[\leadsto 1 + \color{blue}{ux \cdot \left(maxCos - 1\right)} \]
17. +-commutativeN/A
  \[\leadsto \color{blue}{ux \cdot \left(maxCos - 1\right) + 1} \]
18. lift-*.f32N/A
  \[\leadsto \color{blue}{ux \cdot \left(maxCos - 1\right)} + 1 \]
19. *-commutativeN/A
  \[\leadsto \color{blue}{\left(maxCos - 1\right) \cdot ux} + 1 \]
20. lower-fma.f3299.9%
  \[\leadsto \color{blue}{\mathsf{fma}\left(maxCos - 1, ux, 1\right)} \]
Applied rewrites99.9%
\[\leadsto \color{blue}{\mathsf{fma}\left(maxCos - 1, ux, 1\right)} \]
Add Preprocessing

Alternative 2: 99.9% accurate, 1.1× speedup?

\[\mathsf{fma}\left(ux, maxCos, 1 - ux\right) \]

(FPCore (ux uy maxCos)
  :precision binary32
  (fma ux maxCos (- 1.0 ux)))

float code(float ux, float uy, float maxCos) {
	return fmaf(ux, maxCos, (1.0f - ux));
}

function code(ux, uy, maxCos)
	return fma(ux, maxCos, Float32(Float32(1.0) - ux))
end

\mathsf{fma}\left(ux, maxCos, 1 - ux\right)

Derivation

Initial program 99.9%
\[\left(1 - ux\right) + ux \cdot maxCos \]
Step-by-step derivation
1. lift-+.f32N/A
  \[\leadsto \color{blue}{\left(1 - ux\right) + ux \cdot maxCos} \]
2. +-commutativeN/A
  \[\leadsto \color{blue}{ux \cdot maxCos + \left(1 - ux\right)} \]
3. lift-*.f32N/A
  \[\leadsto \color{blue}{ux \cdot maxCos} + \left(1 - ux\right) \]
4. lower-fma.f3299.9%
  \[\leadsto \color{blue}{\mathsf{fma}\left(ux, maxCos, 1 - ux\right)} \]
Applied rewrites99.9%
\[\leadsto \color{blue}{\mathsf{fma}\left(ux, maxCos, 1 - ux\right)} \]
Add Preprocessing

Alternative 3: 98.0% accurate, 2.6× speedup?

\[1 - ux \]

(FPCore (ux uy maxCos)
  :precision binary32
  (- 1.0 ux))

float code(float ux, float uy, float maxCos) {
	return 1.0f - ux;
}

real(4) function code(ux, uy, maxcos)
use fmin_fmax_functions
    real(4), intent (in) :: ux
    real(4), intent (in) :: uy
    real(4), intent (in) :: maxcos
    code = 1.0e0 - ux
end function

function code(ux, uy, maxCos)
	return Float32(Float32(1.0) - ux)
end

function tmp = code(ux, uy, maxCos)
	tmp = single(1.0) - ux;
end

1 - ux

Derivation

Initial program 99.9%
\[\left(1 - ux\right) + ux \cdot maxCos \]
Taylor expanded in maxCos around 0
\[\leadsto \color{blue}{1 - ux} \]
Step-by-step derivation
1. lower--.f3298.0%
  \[\leadsto 1 - \color{blue}{ux} \]
Applied rewrites98.0%
\[\leadsto \color{blue}{1 - ux} \]
Add Preprocessing

Alternative 4: 71.7% accurate, 9.2× speedup?

\[1 \]

(FPCore (ux uy maxCos)
  :precision binary32
  1.0)

float code(float ux, float uy, float maxCos) {
	return 1.0f;
}

real(4) function code(ux, uy, maxcos)
use fmin_fmax_functions
    real(4), intent (in) :: ux
    real(4), intent (in) :: uy
    real(4), intent (in) :: maxcos
    code = 1.0e0
end function

function code(ux, uy, maxCos)
	return Float32(1.0)
end

function tmp = code(ux, uy, maxCos)
	tmp = single(1.0);
end

Derivation

Initial program 99.9%
\[\left(1 - ux\right) + ux \cdot maxCos \]
Taylor expanded in maxCos around 0
\[\leadsto \color{blue}{1 - ux} \]
Step-by-step derivation
1. lower--.f3298.0%
  \[\leadsto 1 - \color{blue}{ux} \]
Applied rewrites98.0%
\[\leadsto \color{blue}{1 - ux} \]
Taylor expanded in ux around 0
\[\leadsto 1 \]
Step-by-step derivation
Applied rewrites71.7%
\[\leadsto 1 \]
Add Preprocessing

UniformSampleCone, z

Specification

Local Percentage Accuracy vs ?

Accuracy vs Speed?

Initial Program: 99.9% accurate, 1.0× speedup?

Alternative 1: 99.9% accurate, 1.1× speedup?

Alternative 2: 99.9% accurate, 1.1× speedup?

Alternative 3: 98.0% accurate, 2.6× speedup?

Alternative 4: 71.7% accurate, 9.2× speedup?

Reproduce

Specification

Local Percentage Accuracy vs ?

Accuracy vs Speed?

Initial Program: 99.9% accurate, 1.0× speedupMathFPCoreCFortranJuliaMATLABTeX?

Alternative 1: 99.9% accurate, 1.1× speedupMathFPCoreCJuliaTeX?

Alternative 2: 99.9% accurate, 1.1× speedupMathFPCoreCJuliaTeX?

Alternative 3: 98.0% accurate, 2.6× speedupMathFPCoreCFortranJuliaMATLABTeX?

Alternative 4: 71.7% accurate, 9.2× speedupMathFPCoreCFortranJuliaMATLABTeX?

Reproduce

Initial Program: 99.9% accurate, 1.0× speedup?

Alternative 1: 99.9% accurate, 1.1× speedup?

Alternative 2: 99.9% accurate, 1.1× speedup?

Alternative 3: 98.0% accurate, 2.6× speedup?

Alternative 4: 71.7% accurate, 9.2× speedup?