Minor optimizations

2025-12-22 22:22:02 +00:00 · 2023-11-13 17:54:27 +00:00
parent d049162c18
commit ea204b562e
2 changed files with 34 additions and 11 deletions
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -111,6 +111,21 @@ fn sin_benchmarks(group: &mut BenchmarkGroup<WallTime>, x_f64: &[f64], x_f32: &[
    });
 }

+fn tan_benchmarks(group: &mut BenchmarkGroup<WallTime>, x_f64: &[f64], x_f32: &[f32]) {
+    group.bench_function("f64_fast", |b| {
+        b.iter(|| x_f64.iter().map(|&x| black_box(x).fast_tan()).collect::<Vec<f64>>())
+    });
+    group.bench_function("f64_builtin", |b| {
+        b.iter(|| x_f64.iter().map(|&x| exact::f64::tan(black_box(x))).collect::<Vec<f64>>())
+    });
+    group.bench_function("f32_fast", |b| {
+        b.iter(|| x_f32.iter().map(|&x| black_box(x).fast_tan()).collect::<Vec<f32>>())
+    });
+    group.bench_function("f32_builtin", |b| {
+        b.iter(|| x_f32.iter().map(|&x| exact::f32::tan(black_box(x))).collect::<Vec<f32>>())
+    });
+}
+
 fn sigmoid_benchmarks(group: &mut BenchmarkGroup<WallTime>, x_f64: &[f64], x_f32: &[f32]) {
    group.bench_function("f64_fast", |b| {
        b.iter(|| x_f64.iter().map(|&x| black_box(x).fast_sigmoid()).collect::<Vec<f64>>())
@@ -147,6 +162,10 @@ fn criterion_benchmark(c: &mut Criterion) {
    sin_benchmarks(&mut group, &X_F64, &X_F32);
    group.finish();

+    let mut group = c.benchmark_group("tan");
+    tan_benchmarks(&mut group, &X_F64, &X_F32);
+    group.finish();
+
    let mut group = c.benchmark_group("sigmoid");
    sigmoid_benchmarks(&mut group, &X_F64, &X_F32);
    group.finish();
--- a/src/fastmath.rs
+++ b/src/fastmath.rs
@@ -7,6 +7,7 @@ use crate::lookup::{
    EndoCosLookupTable, EndoSinLookupTable,
    ToIterator, ToParallelIterator,
 };
+use rayon::prelude::*;

 const SIN_LOOKUP_F32: EndoSinLookupTable<f32> = EndoSinLookupTable::<f32>::new();
 const SIN_LOOKUP_F64: EndoSinLookupTable<f64> = EndoSinLookupTable::<f64>::new();
@@ -138,7 +139,7 @@ impl FastSin for f64 {
    }
 }

-pub trait FastTan { // tan(x) = sin(x) / cos(x)
+pub trait FastTan {
    fn fast_tan(self: Self) -> Self;
 }
 impl FastTan for f32 {
@@ -150,21 +151,24 @@ impl FastTan for f32 {
        let qpprox_sin = 
            1.0 - f32_consts::FRAC_2_PI *
            ((((self + f32_consts::FRAC_PI_2).abs()) % f32_consts::TAU) - f32_consts::PI).abs();
-        ((qpprox_sin * (1.0 + f32_consts::FRAC_PI_6)) - (qpprox_sin.powi(3) * f32_consts::FRAC_PI_6)) /
-        ((qpprox_cos * (1.0 + f32_consts::FRAC_PI_6)) - (qpprox_cos.powi(3) * f32_consts::FRAC_PI_6))
+        let qpprox_sin_fpi6 = qpprox_sin * f32_consts::FRAC_PI_6;
+        let qpprox_cos_fpi6 = qpprox_cos * f32_consts::FRAC_PI_6;
+        ((qpprox_sin + qpprox_sin_fpi6) - (qpprox_sin * qpprox_sin * qpprox_sin_fpi6)) /
+        ((qpprox_cos + qpprox_cos_fpi6) - (qpprox_cos * qpprox_cos * qpprox_cos_fpi6))
    }
 }
 impl FastTan for f64 {
    #[inline]
    fn fast_tan(self: Self) -> f64 {
-        let qpprox_cos = 
-            1.0 - f64_consts::FRAC_2_PI *
-            ((((self + f64_consts::PI).abs()) % f64_consts::TAU) - f64_consts::PI).abs();
-        let qpprox_sin = 
-            1.0 - f64_consts::FRAC_2_PI *
-            ((((self + f64_consts::FRAC_PI_2).abs()) % f64_consts::TAU) - f64_consts::PI).abs();
-        ((qpprox_sin * (1.0 + f64_consts::FRAC_PI_6)) - (qpprox_sin.powi(3) * f64_consts::FRAC_PI_6)) /
-        ((qpprox_cos * (1.0 + f64_consts::FRAC_PI_6)) - (qpprox_cos.powi(3) * f64_consts::FRAC_PI_6))
+        let [sin_self, cos_self] = [f64_consts::PI, f64_consts::FRAC_2_PI].map(
+            |shift: f64| -> f64 {
+                let shifted = self + shift;
+                let qpprox = 1.0 - (f64_consts::FRAC_2_PI * ((shifted.abs() % f64_consts::TAU) - f64_consts::PI).abs());
+                let qpprox_fpi6 = qpprox * f64_consts::FRAC_PI_6;
+                (qpprox + qpprox_fpi6) - (qpprox * qpprox * qpprox_fpi6)
+            } 
+        );
+        sin_self / cos_self
    }
 }