Result for Disney BSSRDF, sample scattering profile, lower

Specification

\[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]

\[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]

(FPCore (s u)
  :precision binary32
  :pre (and (and (<= 0.0 s) (<= s 256.0))
     (and (<= 2.328306437e-10 u) (<= u 0.25)))
  (* s (log (/ 1.0 (- 1.0 (* 4.0 u))))))

float code(float s, float u) {
	return s * logf((1.0f / (1.0f - (4.0f * u))));
}

real(4) function code(s, u)
use fmin_fmax_functions
    real(4), intent (in) :: s
    real(4), intent (in) :: u
    code = s * log((1.0e0 / (1.0e0 - (4.0e0 * u))))
end function

function code(s, u)
	return Float32(s * log(Float32(Float32(1.0) / Float32(Float32(1.0) - Float32(Float32(4.0) * u)))))
end

function tmp = code(s, u)
	tmp = s * log((single(1.0) / (single(1.0) - (single(4.0) * u))));
end

s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right)

Initial Program: 60.6% accurate, 1.0× speedup?

\[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]

\[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]

(FPCore (s u)
  :precision binary32
  :pre (and (and (<= 0.0 s) (<= s 256.0))
     (and (<= 2.328306437e-10 u) (<= u 0.25)))
  (* s (log (/ 1.0 (- 1.0 (* 4.0 u))))))

float code(float s, float u) {
	return s * logf((1.0f / (1.0f - (4.0f * u))));
}

real(4) function code(s, u)
use fmin_fmax_functions
    real(4), intent (in) :: s
    real(4), intent (in) :: u
    code = s * log((1.0e0 / (1.0e0 - (4.0e0 * u))))
end function

function code(s, u)
	return Float32(s * log(Float32(Float32(1.0) / Float32(Float32(1.0) - Float32(Float32(4.0) * u)))))
end

function tmp = code(s, u)
	tmp = s * log((single(1.0) / (single(1.0) - (single(4.0) * u))));
end

s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right)

Alternative 1: 99.4% accurate, 1.1× speedup?

\[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]

\[s \cdot \left(-\mathsf{log1p}\left(-4 \cdot u\right)\right) \]

(FPCore (s u)
  :precision binary32
  :pre (and (and (<= 0.0 s) (<= s 256.0))
     (and (<= 2.328306437e-10 u) (<= u 0.25)))
  (* s (- (log1p (* -4.0 u)))))

float code(float s, float u) {
	return s * -log1pf((-4.0f * u));
}

function code(s, u)
	return Float32(s * Float32(-log1p(Float32(Float32(-4.0) * u))))
end

s \cdot \left(-\mathsf{log1p}\left(-4 \cdot u\right)\right)

Derivation

Initial program 60.6%
\[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
Step-by-step derivation
Applied rewrites63.2%
\[\leadsto s \cdot \left(-\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right) \]
Step-by-step derivation
Applied rewrites99.4%
\[\leadsto s \cdot \left(-\mathsf{log1p}\left(-4 \cdot u\right)\right) \]
Add Preprocessing

Alternative 2: 98.2% accurate, 0.8× speedup?

\[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]

\[\begin{array}{l} \mathbf{if}\;4 \cdot u \leq 0.013000000268220901:\\ \;\;\;\;u \cdot \left(\mathsf{fma}\left(\mathsf{fma}\left(21.333333333333332, u, 8\right), u, 4\right) \cdot s\right)\\ \mathbf{else}:\\ \;\;\;\;s \cdot \left(-\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right)\\ \end{array} \]

(FPCore (s u)
  :precision binary32
  :pre (and (and (<= 0.0 s) (<= s 256.0))
     (and (<= 2.328306437e-10 u) (<= u 0.25)))
  (if (<= (* 4.0 u) 0.013000000268220901)
  (* u (* (fma (fma 21.333333333333332 u 8.0) u 4.0) s))
  (* s (- (log (fma -4.0 u 1.0))))))

float code(float s, float u) {
	float tmp;
	if ((4.0f * u) <= 0.013000000268220901f) {
		tmp = u * (fmaf(fmaf(21.333333333333332f, u, 8.0f), u, 4.0f) * s);
	} else {
		tmp = s * -logf(fmaf(-4.0f, u, 1.0f));
	}
	return tmp;
}

function code(s, u)
	tmp = Float32(0.0)
	if (Float32(Float32(4.0) * u) <= Float32(0.013000000268220901))
		tmp = Float32(u * Float32(fma(fma(Float32(21.333333333333332), u, Float32(8.0)), u, Float32(4.0)) * s));
	else
		tmp = Float32(s * Float32(-log(fma(Float32(-4.0), u, Float32(1.0)))));
	end
	return tmp
end

\begin{array}{l}
\mathbf{if}\;4 \cdot u \leq 0.013000000268220901:\\
\;\;\;\;u \cdot \left(\mathsf{fma}\left(\mathsf{fma}\left(21.333333333333332, u, 8\right), u, 4\right) \cdot s\right)\\

\mathbf{else}:\\
\;\;\;\;s \cdot \left(-\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right)\\


\end{array}

Derivation

Split input into 2 regimes
if (*.f32 #s(literal 4 binary32) u) < 0.0130000003
1. Initial program 60.6%
  \[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
2. Taylor expanded in u around 0
  \[\leadsto u \cdot \left(4 \cdot s + u \cdot \left(8 \cdot s + \frac{64}{3} \cdot \left(s \cdot u\right)\right)\right) \]
3. Step-by-step derivation
4. Applied rewrites91.8%
  \[\leadsto u \cdot \mathsf{fma}\left(4, s, u \cdot \mathsf{fma}\left(8, s, 21.333333333333332 \cdot \left(s \cdot u\right)\right)\right) \]
5. Step-by-step derivation
6. Applied rewrites91.7%
  \[\leadsto \mathsf{fma}\left(u \cdot u, \mathsf{fma}\left(8, s, \left(u \cdot s\right) \cdot 21.333333333333332\right), \left(4 \cdot s\right) \cdot u\right) \]
7. Step-by-step derivation
8. Applied rewrites91.5%
  \[\leadsto u \cdot \left(\mathsf{fma}\left(\mathsf{fma}\left(21.333333333333332, u, 8\right), u, 4\right) \cdot s\right) \]
if 0.0130000003 < (*.f32 #s(literal 4 binary32) u)
1. Initial program 60.6%
  \[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
2. Step-by-step derivation
3. Applied rewrites63.2%
  \[\leadsto s \cdot \left(-\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right) \]
Recombined 2 regimes into one program.
Add Preprocessing

Alternative 3: 97.1% accurate, 0.9× speedup?

\[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]

\[\begin{array}{l} \mathbf{if}\;4 \cdot u \leq 0.003599999938160181:\\ \;\;\;\;u \cdot \mathsf{fma}\left(4, s, 8 \cdot \left(s \cdot u\right)\right)\\ \mathbf{else}:\\ \;\;\;\;s \cdot \left(-\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right)\\ \end{array} \]

(FPCore (s u)
  :precision binary32
  :pre (and (and (<= 0.0 s) (<= s 256.0))
     (and (<= 2.328306437e-10 u) (<= u 0.25)))
  (if (<= (* 4.0 u) 0.003599999938160181)
  (* u (fma 4.0 s (* 8.0 (* s u))))
  (* s (- (log (fma -4.0 u 1.0))))))

float code(float s, float u) {
	float tmp;
	if ((4.0f * u) <= 0.003599999938160181f) {
		tmp = u * fmaf(4.0f, s, (8.0f * (s * u)));
	} else {
		tmp = s * -logf(fmaf(-4.0f, u, 1.0f));
	}
	return tmp;
}

function code(s, u)
	tmp = Float32(0.0)
	if (Float32(Float32(4.0) * u) <= Float32(0.003599999938160181))
		tmp = Float32(u * fma(Float32(4.0), s, Float32(Float32(8.0) * Float32(s * u))));
	else
		tmp = Float32(s * Float32(-log(fma(Float32(-4.0), u, Float32(1.0)))));
	end
	return tmp
end

\begin{array}{l}
\mathbf{if}\;4 \cdot u \leq 0.003599999938160181:\\
\;\;\;\;u \cdot \mathsf{fma}\left(4, s, 8 \cdot \left(s \cdot u\right)\right)\\

\mathbf{else}:\\
\;\;\;\;s \cdot \left(-\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right)\\


\end{array}

Derivation

Split input into 2 regimes
if (*.f32 #s(literal 4 binary32) u) < 0.00359999994
1. Initial program 60.6%
  \[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
2. Taylor expanded in u around 0
  \[\leadsto u \cdot \left(4 \cdot s + 8 \cdot \left(s \cdot u\right)\right) \]
3. Step-by-step derivation
4. Applied rewrites87.6%
  \[\leadsto u \cdot \mathsf{fma}\left(4, s, 8 \cdot \left(s \cdot u\right)\right) \]
if 0.00359999994 < (*.f32 #s(literal 4 binary32) u)
1. Initial program 60.6%
  \[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
2. Step-by-step derivation
3. Applied rewrites63.2%
  \[\leadsto s \cdot \left(-\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right) \]
Recombined 2 regimes into one program.
Add Preprocessing

Alternative 4: 87.6% accurate, 1.3× speedup?

\[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]

\[u \cdot \mathsf{fma}\left(s, 8 \cdot u, 4 \cdot s\right) \]

(FPCore (s u)
  :precision binary32
  :pre (and (and (<= 0.0 s) (<= s 256.0))
     (and (<= 2.328306437e-10 u) (<= u 0.25)))
  (* u (fma s (* 8.0 u) (* 4.0 s))))

float code(float s, float u) {
	return u * fmaf(s, (8.0f * u), (4.0f * s));
}

function code(s, u)
	return Float32(u * fma(s, Float32(Float32(8.0) * u), Float32(Float32(4.0) * s)))
end

u \cdot \mathsf{fma}\left(s, 8 \cdot u, 4 \cdot s\right)

Derivation

Initial program 60.6%
\[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
Taylor expanded in u around 0
\[\leadsto u \cdot \left(4 \cdot s + 8 \cdot \left(s \cdot u\right)\right) \]
Step-by-step derivation
Applied rewrites87.6%
\[\leadsto u \cdot \mathsf{fma}\left(4, s, 8 \cdot \left(s \cdot u\right)\right) \]
Step-by-step derivation
Applied rewrites87.6%
\[\leadsto u \cdot \mathsf{fma}\left(s, 8 \cdot u, 4 \cdot s\right) \]
Add Preprocessing

Alternative 5: 87.6% accurate, 1.3× speedup?

\[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]

\[u \cdot \mathsf{fma}\left(4, s, 8 \cdot \left(s \cdot u\right)\right) \]

(FPCore (s u)
  :precision binary32
  :pre (and (and (<= 0.0 s) (<= s 256.0))
     (and (<= 2.328306437e-10 u) (<= u 0.25)))
  (* u (fma 4.0 s (* 8.0 (* s u)))))

float code(float s, float u) {
	return u * fmaf(4.0f, s, (8.0f * (s * u)));
}

function code(s, u)
	return Float32(u * fma(Float32(4.0), s, Float32(Float32(8.0) * Float32(s * u))))
end

u \cdot \mathsf{fma}\left(4, s, 8 \cdot \left(s \cdot u\right)\right)

Derivation

Initial program 60.6%
\[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
Taylor expanded in u around 0
\[\leadsto u \cdot \left(4 \cdot s + 8 \cdot \left(s \cdot u\right)\right) \]
Step-by-step derivation
Applied rewrites87.6%
\[\leadsto u \cdot \mathsf{fma}\left(4, s, 8 \cdot \left(s \cdot u\right)\right) \]
Add Preprocessing

Alternative 6: 87.4% accurate, 1.6× speedup?

\[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]

\[u \cdot \left(\mathsf{fma}\left(8, u, 4\right) \cdot s\right) \]

(FPCore (s u)
  :precision binary32
  :pre (and (and (<= 0.0 s) (<= s 256.0))
     (and (<= 2.328306437e-10 u) (<= u 0.25)))
  (* u (* (fma 8.0 u 4.0) s)))

float code(float s, float u) {
	return u * (fmaf(8.0f, u, 4.0f) * s);
}

function code(s, u)
	return Float32(u * Float32(fma(Float32(8.0), u, Float32(4.0)) * s))
end

u \cdot \left(\mathsf{fma}\left(8, u, 4\right) \cdot s\right)

Derivation

Initial program 60.6%
\[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
Taylor expanded in u around 0
\[\leadsto u \cdot \left(4 \cdot s + 8 \cdot \left(s \cdot u\right)\right) \]
Step-by-step derivation
Applied rewrites87.6%
\[\leadsto u \cdot \mathsf{fma}\left(4, s, 8 \cdot \left(s \cdot u\right)\right) \]
Step-by-step derivation
Applied rewrites87.6%
\[\leadsto \mathsf{fma}\left(8 \cdot s, u \cdot u, \left(4 \cdot s\right) \cdot u\right) \]
Step-by-step derivation
Applied rewrites87.4%
\[\leadsto u \cdot \left(\mathsf{fma}\left(8, u, 4\right) \cdot s\right) \]
Add Preprocessing

Alternative 7: 74.7% accurate, 2.8× speedup?

\[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]

\[s \cdot \left(u \cdot 4\right) \]

(FPCore (s u)
  :precision binary32
  :pre (and (and (<= 0.0 s) (<= s 256.0))
     (and (<= 2.328306437e-10 u) (<= u 0.25)))
  (* s (* u 4.0)))

float code(float s, float u) {
	return s * (u * 4.0f);
}

real(4) function code(s, u)
use fmin_fmax_functions
    real(4), intent (in) :: s
    real(4), intent (in) :: u
    code = s * (u * 4.0e0)
end function

function code(s, u)
	return Float32(s * Float32(u * Float32(4.0)))
end

function tmp = code(s, u)
	tmp = s * (u * single(4.0));
end

s \cdot \left(u \cdot 4\right)

Derivation

Initial program 60.6%
\[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
Taylor expanded in u around 0
\[\leadsto s \cdot \left(u \cdot \left(4 + 8 \cdot u\right)\right) \]
Step-by-step derivation
Applied rewrites87.4%
\[\leadsto s \cdot \left(u \cdot \left(4 + 8 \cdot u\right)\right) \]
Taylor expanded in u around 0
\[\leadsto s \cdot \left(u \cdot 4\right) \]
Step-by-step derivation
Applied rewrites74.7%
\[\leadsto s \cdot \left(u \cdot 4\right) \]
Add Preprocessing

Alternative 8: 74.4% accurate, 2.8× speedup?

\[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]

\[4 \cdot \left(s \cdot u\right) \]

(FPCore (s u)
  :precision binary32
  :pre (and (and (<= 0.0 s) (<= s 256.0))
     (and (<= 2.328306437e-10 u) (<= u 0.25)))
  (* 4.0 (* s u)))

float code(float s, float u) {
	return 4.0f * (s * u);
}

real(4) function code(s, u)
use fmin_fmax_functions
    real(4), intent (in) :: s
    real(4), intent (in) :: u
    code = 4.0e0 * (s * u)
end function

function code(s, u)
	return Float32(Float32(4.0) * Float32(s * u))
end

function tmp = code(s, u)
	tmp = single(4.0) * (s * u);
end

4 \cdot \left(s \cdot u\right)

Derivation

Initial program 60.6%
\[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
Taylor expanded in u around 0
\[\leadsto 4 \cdot \left(s \cdot u\right) \]
Step-by-step derivation
Applied rewrites74.4%
\[\leadsto 4 \cdot \left(s \cdot u\right) \]
Add Preprocessing

Disney BSSRDF, sample scattering profile, lower

Specification

Local Percentage Accuracy vs ?

Accuracy vs Speed?

Initial Program: 60.6% accurate, 1.0× speedup?

Alternative 1: 99.4% accurate, 1.1× speedup?

Alternative 2: 98.2% accurate, 0.8× speedup?

`if (*.f32 #s(literal 4 binary32) u) < 0.0130000003`

`if 0.0130000003 < (*.f32 #s(literal 4 binary32) u)`

Alternative 3: 97.1% accurate, 0.9× speedup?

`if (*.f32 #s(literal 4 binary32) u) < 0.00359999994`

`if 0.00359999994 < (*.f32 #s(literal 4 binary32) u)`

Alternative 4: 87.6% accurate, 1.3× speedup?

Alternative 5: 87.6% accurate, 1.3× speedup?

Alternative 6: 87.4% accurate, 1.6× speedup?

Alternative 7: 74.7% accurate, 2.8× speedup?

Alternative 8: 74.4% accurate, 2.8× speedup?

Reproduce

Specification

Local Percentage Accuracy vs ?

Accuracy vs Speed?

Initial Program: 60.6% accurate, 1.0× speedupMathFPCoreCFortranJuliaMATLABTeX?

Alternative 1: 99.4% accurate, 1.1× speedupMathFPCoreCJuliaTeX?

Alternative 2: 98.2% accurate, 0.8× speedupMathFPCoreCJuliaTeX?

if (*.f32 #s(literal 4 binary32) u) < 0.0130000003

if 0.0130000003 < (*.f32 #s(literal 4 binary32) u)

Alternative 3: 97.1% accurate, 0.9× speedupMathFPCoreCJuliaTeX?

if (*.f32 #s(literal 4 binary32) u) < 0.00359999994

if 0.00359999994 < (*.f32 #s(literal 4 binary32) u)

Alternative 4: 87.6% accurate, 1.3× speedupMathFPCoreCJuliaTeX?

Alternative 5: 87.6% accurate, 1.3× speedupMathFPCoreCJuliaTeX?

Alternative 6: 87.4% accurate, 1.6× speedupMathFPCoreCJuliaTeX?

Alternative 7: 74.7% accurate, 2.8× speedupMathFPCoreCFortranJuliaMATLABTeX?

Alternative 8: 74.4% accurate, 2.8× speedupMathFPCoreCFortranJuliaMATLABTeX?

Reproduce

Initial Program: 60.6% accurate, 1.0× speedup?

Alternative 1: 99.4% accurate, 1.1× speedup?

Alternative 2: 98.2% accurate, 0.8× speedup?

`if (*.f32 #s(literal 4 binary32) u) < 0.0130000003`

`if 0.0130000003 < (*.f32 #s(literal 4 binary32) u)`

Alternative 3: 97.1% accurate, 0.9× speedup?

`if (*.f32 #s(literal 4 binary32) u) < 0.00359999994`

`if 0.00359999994 < (*.f32 #s(literal 4 binary32) u)`

Alternative 4: 87.6% accurate, 1.3× speedup?

Alternative 5: 87.6% accurate, 1.3× speedup?

Alternative 6: 87.4% accurate, 1.6× speedup?

Alternative 7: 74.7% accurate, 2.8× speedup?

Alternative 8: 74.4% accurate, 2.8× speedup?