Disney BSSRDF, sample scattering profile, lower

Percentage Accurate: 61.4% → 99.4%
Time: 3.8s
Alternatives: 6
Speedup: 2.8×

Specification

?
\[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]
\[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
(FPCore (s u)
  :precision binary32
  :pre (and (and (<= 0.0 s) (<= s 256.0))
     (and (<= 2.328306437e-10 u) (<= u 0.25)))
  (* s (log (/ 1.0 (- 1.0 (* 4.0 u))))))
float code(float s, float u) {
	return s * logf((1.0f / (1.0f - (4.0f * u))));
}
real(4) function code(s, u)
use fmin_fmax_functions
    real(4), intent (in) :: s
    real(4), intent (in) :: u
    code = s * log((1.0e0 / (1.0e0 - (4.0e0 * u))))
end function
function code(s, u)
	return Float32(s * log(Float32(Float32(1.0) / Float32(Float32(1.0) - Float32(Float32(4.0) * u)))))
end
function tmp = code(s, u)
	tmp = s * log((single(1.0) / (single(1.0) - (single(4.0) * u))));
end
s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right)

Local Percentage Accuracy vs ?

The average percentage accuracy by input value. Horizontal axis shows value of an input variable; the variable is choosen in the title. Vertical axis is accuracy; higher is better. Red represent the original program, while blue represents Herbie's suggestion. These can be toggled with buttons below the plot. The line is an average while dots represent individual samples.

Accuracy vs Speed?

Herbie found 6 alternatives:

AlternativeAccuracySpeedup
The accuracy (vertical axis) and speed (horizontal axis) of each alternatives. Up and to the right is better. The red square shows the initial program, and each blue circle shows an alternative.The line shows the best available speed-accuracy tradeoffs.

Initial Program: 61.4% accurate, 1.0× speedup?

\[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]
\[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
(FPCore (s u)
  :precision binary32
  :pre (and (and (<= 0.0 s) (<= s 256.0))
     (and (<= 2.328306437e-10 u) (<= u 0.25)))
  (* s (log (/ 1.0 (- 1.0 (* 4.0 u))))))
float code(float s, float u) {
	return s * logf((1.0f / (1.0f - (4.0f * u))));
}
real(4) function code(s, u)
use fmin_fmax_functions
    real(4), intent (in) :: s
    real(4), intent (in) :: u
    code = s * log((1.0e0 / (1.0e0 - (4.0e0 * u))))
end function
function code(s, u)
	return Float32(s * log(Float32(Float32(1.0) / Float32(Float32(1.0) - Float32(Float32(4.0) * u)))))
end
function tmp = code(s, u)
	tmp = s * log((single(1.0) / (single(1.0) - (single(4.0) * u))));
end
s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right)

Alternative 1: 99.4% accurate, 1.1× speedup?

\[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]
\[s \cdot \left(-\mathsf{log1p}\left(-4 \cdot u\right)\right) \]
(FPCore (s u)
  :precision binary32
  :pre (and (and (<= 0.0 s) (<= s 256.0))
     (and (<= 2.328306437e-10 u) (<= u 0.25)))
  (* s (- (log1p (* -4.0 u)))))
float code(float s, float u) {
	return s * -log1pf((-4.0f * u));
}
function code(s, u)
	return Float32(s * Float32(-log1p(Float32(Float32(-4.0) * u))))
end
s \cdot \left(-\mathsf{log1p}\left(-4 \cdot u\right)\right)
Derivation
  1. Initial program 61.4%

    \[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
  2. Step-by-step derivation
    1. Applied rewrites63.9%

      \[\leadsto s \cdot \left(-\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right) \]
    2. Applied rewrites99.4%

      \[\leadsto s \cdot \left(-\mathsf{log1p}\left(-4 \cdot u\right)\right) \]
    3. Add Preprocessing

    Alternative 2: 98.4% accurate, 0.8× speedup?

    \[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]
    \[\begin{array}{l} \mathbf{if}\;1 - 4 \cdot u \leq 0.9819999933242798:\\ \;\;\;\;s \cdot \left(-\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\frac{s}{\mathsf{fma}\left(-0.3333333333333333, u, -0.5\right) + \frac{0.25}{u}}\\ \end{array} \]
    (FPCore (s u)
      :precision binary32
      :pre (and (and (<= 0.0 s) (<= s 256.0))
         (and (<= 2.328306437e-10 u) (<= u 0.25)))
      (if (<= (- 1.0 (* 4.0 u)) 0.9819999933242798)
      (* s (- (log (fma -4.0 u 1.0))))
      (/ s (+ (fma -0.3333333333333333 u -0.5) (/ 0.25 u)))))
    float code(float s, float u) {
    	float tmp;
    	if ((1.0f - (4.0f * u)) <= 0.9819999933242798f) {
    		tmp = s * -logf(fmaf(-4.0f, u, 1.0f));
    	} else {
    		tmp = s / (fmaf(-0.3333333333333333f, u, -0.5f) + (0.25f / u));
    	}
    	return tmp;
    }
    
    function code(s, u)
    	tmp = Float32(0.0)
    	if (Float32(Float32(1.0) - Float32(Float32(4.0) * u)) <= Float32(0.9819999933242798))
    		tmp = Float32(s * Float32(-log(fma(Float32(-4.0), u, Float32(1.0)))));
    	else
    		tmp = Float32(s / Float32(fma(Float32(-0.3333333333333333), u, Float32(-0.5)) + Float32(Float32(0.25) / u)));
    	end
    	return tmp
    end
    
    \begin{array}{l}
    \mathbf{if}\;1 - 4 \cdot u \leq 0.9819999933242798:\\
    \;\;\;\;s \cdot \left(-\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right)\\
    
    \mathbf{else}:\\
    \;\;\;\;\frac{s}{\mathsf{fma}\left(-0.3333333333333333, u, -0.5\right) + \frac{0.25}{u}}\\
    
    
    \end{array}
    
    Derivation
    1. Split input into 2 regimes
    2. if (-.f32 #s(literal 1 binary32) (*.f32 #s(literal 4 binary32) u)) < 0.981999993

      1. Initial program 61.4%

        \[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
      2. Step-by-step derivation
        1. Applied rewrites63.9%

          \[\leadsto s \cdot \left(-\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right) \]

        if 0.981999993 < (-.f32 #s(literal 1 binary32) (*.f32 #s(literal 4 binary32) u))

        1. Initial program 61.4%

          \[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
        2. Step-by-step derivation
          1. Applied rewrites62.1%

            \[\leadsto \frac{1}{{\left(\left(-s\right) \cdot \log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right)}^{-1}} \]
          2. Step-by-step derivation
            1. Applied rewrites62.1%

              \[\leadsto \frac{1}{\frac{\frac{-1}{\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)}}{s}} \]
            2. Taylor expanded in u around 0

              \[\leadsto \frac{1}{\frac{\frac{\frac{1}{4} + u \cdot \left(\frac{-1}{3} \cdot u - \frac{1}{2}\right)}{u}}{s}} \]
            3. Step-by-step derivation
              1. Applied rewrites88.9%

                \[\leadsto \frac{1}{\frac{\frac{0.25 + u \cdot \left(-0.3333333333333333 \cdot u - 0.5\right)}{u}}{s}} \]
              2. Applied rewrites92.6%

                \[\leadsto \frac{s}{\mathsf{fma}\left(-0.3333333333333333, u, -0.5\right) + \frac{0.25}{u}} \]
            4. Recombined 2 regimes into one program.
            5. Add Preprocessing

            Alternative 3: 97.3% accurate, 0.8× speedup?

            \[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]
            \[\begin{array}{l} \mathbf{if}\;1 - 4 \cdot u \leq 0.9957000017166138:\\ \;\;\;\;s \cdot \left(-\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right)\\ \mathbf{else}:\\ \;\;\;\;\frac{s}{\frac{0.25}{u} - 0.5}\\ \end{array} \]
            (FPCore (s u)
              :precision binary32
              :pre (and (and (<= 0.0 s) (<= s 256.0))
                 (and (<= 2.328306437e-10 u) (<= u 0.25)))
              (if (<= (- 1.0 (* 4.0 u)) 0.9957000017166138)
              (* s (- (log (fma -4.0 u 1.0))))
              (/ s (- (/ 0.25 u) 0.5))))
            float code(float s, float u) {
            	float tmp;
            	if ((1.0f - (4.0f * u)) <= 0.9957000017166138f) {
            		tmp = s * -logf(fmaf(-4.0f, u, 1.0f));
            	} else {
            		tmp = s / ((0.25f / u) - 0.5f);
            	}
            	return tmp;
            }
            
            function code(s, u)
            	tmp = Float32(0.0)
            	if (Float32(Float32(1.0) - Float32(Float32(4.0) * u)) <= Float32(0.9957000017166138))
            		tmp = Float32(s * Float32(-log(fma(Float32(-4.0), u, Float32(1.0)))));
            	else
            		tmp = Float32(s / Float32(Float32(Float32(0.25) / u) - Float32(0.5)));
            	end
            	return tmp
            end
            
            \begin{array}{l}
            \mathbf{if}\;1 - 4 \cdot u \leq 0.9957000017166138:\\
            \;\;\;\;s \cdot \left(-\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right)\\
            
            \mathbf{else}:\\
            \;\;\;\;\frac{s}{\frac{0.25}{u} - 0.5}\\
            
            
            \end{array}
            
            Derivation
            1. Split input into 2 regimes
            2. if (-.f32 #s(literal 1 binary32) (*.f32 #s(literal 4 binary32) u)) < 0.995700002

              1. Initial program 61.4%

                \[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
              2. Step-by-step derivation
                1. Applied rewrites63.9%

                  \[\leadsto s \cdot \left(-\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right) \]

                if 0.995700002 < (-.f32 #s(literal 1 binary32) (*.f32 #s(literal 4 binary32) u))

                1. Initial program 61.4%

                  \[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
                2. Step-by-step derivation
                  1. Applied rewrites62.1%

                    \[\leadsto \frac{1}{{\left(\left(-s\right) \cdot \log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right)}^{-1}} \]
                  2. Step-by-step derivation
                    1. Applied rewrites62.1%

                      \[\leadsto \frac{1}{\frac{\frac{-1}{\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)}}{s}} \]
                    2. Taylor expanded in u around 0

                      \[\leadsto \frac{1}{\frac{\frac{\frac{1}{4} + \frac{-1}{2} \cdot u}{u}}{s}} \]
                    3. Step-by-step derivation
                      1. Applied rewrites85.0%

                        \[\leadsto \frac{1}{\frac{\frac{0.25 + -0.5 \cdot u}{u}}{s}} \]
                      2. Applied rewrites88.7%

                        \[\leadsto \frac{s}{\frac{0.25}{u} - 0.5} \]
                    4. Recombined 2 regimes into one program.
                    5. Add Preprocessing

                    Alternative 4: 88.7% accurate, 1.7× speedup?

                    \[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]
                    \[\frac{s}{\frac{0.25}{u} - 0.5} \]
                    (FPCore (s u)
                      :precision binary32
                      :pre (and (and (<= 0.0 s) (<= s 256.0))
                         (and (<= 2.328306437e-10 u) (<= u 0.25)))
                      (/ s (- (/ 0.25 u) 0.5)))
                    float code(float s, float u) {
                    	return s / ((0.25f / u) - 0.5f);
                    }
                    
                    real(4) function code(s, u)
                    use fmin_fmax_functions
                        real(4), intent (in) :: s
                        real(4), intent (in) :: u
                        code = s / ((0.25e0 / u) - 0.5e0)
                    end function
                    
                    function code(s, u)
                    	return Float32(s / Float32(Float32(Float32(0.25) / u) - Float32(0.5)))
                    end
                    
                    function tmp = code(s, u)
                    	tmp = s / ((single(0.25) / u) - single(0.5));
                    end
                    
                    \frac{s}{\frac{0.25}{u} - 0.5}
                    
                    Derivation
                    1. Initial program 61.4%

                      \[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
                    2. Step-by-step derivation
                      1. Applied rewrites62.1%

                        \[\leadsto \frac{1}{{\left(\left(-s\right) \cdot \log \left(\mathsf{fma}\left(-4, u, 1\right)\right)\right)}^{-1}} \]
                      2. Step-by-step derivation
                        1. Applied rewrites62.1%

                          \[\leadsto \frac{1}{\frac{\frac{-1}{\log \left(\mathsf{fma}\left(-4, u, 1\right)\right)}}{s}} \]
                        2. Taylor expanded in u around 0

                          \[\leadsto \frac{1}{\frac{\frac{\frac{1}{4} + \frac{-1}{2} \cdot u}{u}}{s}} \]
                        3. Step-by-step derivation
                          1. Applied rewrites85.0%

                            \[\leadsto \frac{1}{\frac{\frac{0.25 + -0.5 \cdot u}{u}}{s}} \]
                          2. Applied rewrites88.7%

                            \[\leadsto \frac{s}{\frac{0.25}{u} - 0.5} \]
                          3. Add Preprocessing

                          Alternative 5: 73.7% accurate, 2.8× speedup?

                          \[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]
                          \[s \cdot \left(u \cdot 4\right) \]
                          (FPCore (s u)
                            :precision binary32
                            :pre (and (and (<= 0.0 s) (<= s 256.0))
                               (and (<= 2.328306437e-10 u) (<= u 0.25)))
                            (* s (* u 4.0)))
                          float code(float s, float u) {
                          	return s * (u * 4.0f);
                          }
                          
                          real(4) function code(s, u)
                          use fmin_fmax_functions
                              real(4), intent (in) :: s
                              real(4), intent (in) :: u
                              code = s * (u * 4.0e0)
                          end function
                          
                          function code(s, u)
                          	return Float32(s * Float32(u * Float32(4.0)))
                          end
                          
                          function tmp = code(s, u)
                          	tmp = s * (u * single(4.0));
                          end
                          
                          s \cdot \left(u \cdot 4\right)
                          
                          Derivation
                          1. Initial program 61.4%

                            \[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
                          2. Taylor expanded in u around 0

                            \[\leadsto s \cdot \left(u \cdot \left(4 + 8 \cdot u\right)\right) \]
                          3. Step-by-step derivation
                            1. Applied rewrites86.6%

                              \[\leadsto s \cdot \left(u \cdot \left(4 + 8 \cdot u\right)\right) \]
                            2. Taylor expanded in u around 0

                              \[\leadsto s \cdot \left(u \cdot 4\right) \]
                            3. Step-by-step derivation
                              1. Applied rewrites73.7%

                                \[\leadsto s \cdot \left(u \cdot 4\right) \]
                              2. Add Preprocessing

                              Alternative 6: 73.5% accurate, 2.8× speedup?

                              \[\left(0 \leq s \land s \leq 256\right) \land \left(2.328306437 \cdot 10^{-10} \leq u \land u \leq 0.25\right)\]
                              \[4 \cdot \left(s \cdot u\right) \]
                              (FPCore (s u)
                                :precision binary32
                                :pre (and (and (<= 0.0 s) (<= s 256.0))
                                   (and (<= 2.328306437e-10 u) (<= u 0.25)))
                                (* 4.0 (* s u)))
                              float code(float s, float u) {
                              	return 4.0f * (s * u);
                              }
                              
                              real(4) function code(s, u)
                              use fmin_fmax_functions
                                  real(4), intent (in) :: s
                                  real(4), intent (in) :: u
                                  code = 4.0e0 * (s * u)
                              end function
                              
                              function code(s, u)
                              	return Float32(Float32(4.0) * Float32(s * u))
                              end
                              
                              function tmp = code(s, u)
                              	tmp = single(4.0) * (s * u);
                              end
                              
                              4 \cdot \left(s \cdot u\right)
                              
                              Derivation
                              1. Initial program 61.4%

                                \[s \cdot \log \left(\frac{1}{1 - 4 \cdot u}\right) \]
                              2. Taylor expanded in u around 0

                                \[\leadsto 4 \cdot \left(s \cdot u\right) \]
                              3. Step-by-step derivation
                                1. Applied rewrites73.5%

                                  \[\leadsto 4 \cdot \left(s \cdot u\right) \]
                                2. Add Preprocessing

                                Reproduce

                                ?
                                herbie shell --seed 2026086 
                                (FPCore (s u)
                                  :name "Disney BSSRDF, sample scattering profile, lower"
                                  :precision binary32
                                  :pre (and (and (<= 0.0 s) (<= s 256.0)) (and (<= 2.328306437e-10 u) (<= u 0.25)))
                                  (* s (log (/ 1.0 (- 1.0 (* 4.0 u))))))