From ea204b562e87207158efd551c955afbb4fd2e3a0 Mon Sep 17 00:00:00 2001 From: Cian Hughes Date: Mon, 13 Nov 2023 17:54:27 +0000 Subject: [PATCH] Minor optimizations --- benches/bench.rs | 19 +++++++++++++++++++ src/fastmath.rs | 26 +++++++++++++++----------- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/benches/bench.rs b/benches/bench.rs index 2556a52..d98bf20 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -111,6 +111,21 @@ fn sin_benchmarks(group: &mut BenchmarkGroup, x_f64: &[f64], x_f32: &[ }); } +fn tan_benchmarks(group: &mut BenchmarkGroup, x_f64: &[f64], x_f32: &[f32]) { + group.bench_function("f64_fast", |b| { + b.iter(|| x_f64.iter().map(|&x| black_box(x).fast_tan()).collect::>()) + }); + group.bench_function("f64_builtin", |b| { + b.iter(|| x_f64.iter().map(|&x| exact::f64::tan(black_box(x))).collect::>()) + }); + group.bench_function("f32_fast", |b| { + b.iter(|| x_f32.iter().map(|&x| black_box(x).fast_tan()).collect::>()) + }); + group.bench_function("f32_builtin", |b| { + b.iter(|| x_f32.iter().map(|&x| exact::f32::tan(black_box(x))).collect::>()) + }); +} + fn sigmoid_benchmarks(group: &mut BenchmarkGroup, x_f64: &[f64], x_f32: &[f32]) { group.bench_function("f64_fast", |b| { b.iter(|| x_f64.iter().map(|&x| black_box(x).fast_sigmoid()).collect::>()) @@ -147,6 +162,10 @@ fn criterion_benchmark(c: &mut Criterion) { sin_benchmarks(&mut group, &X_F64, &X_F32); group.finish(); + let mut group = c.benchmark_group("tan"); + tan_benchmarks(&mut group, &X_F64, &X_F32); + group.finish(); + let mut group = c.benchmark_group("sigmoid"); sigmoid_benchmarks(&mut group, &X_F64, &X_F32); group.finish(); diff --git a/src/fastmath.rs b/src/fastmath.rs index d4171f4..35515aa 100644 --- a/src/fastmath.rs +++ b/src/fastmath.rs @@ -7,6 +7,7 @@ use crate::lookup::{ EndoCosLookupTable, EndoSinLookupTable, ToIterator, ToParallelIterator, }; +use rayon::prelude::*; const SIN_LOOKUP_F32: EndoSinLookupTable = EndoSinLookupTable::::new(); const SIN_LOOKUP_F64: EndoSinLookupTable = EndoSinLookupTable::::new(); @@ -138,7 +139,7 @@ impl FastSin for f64 { } } -pub trait FastTan { // tan(x) = sin(x) / cos(x) +pub trait FastTan { fn fast_tan(self: Self) -> Self; } impl FastTan for f32 { @@ -150,21 +151,24 @@ impl FastTan for f32 { let qpprox_sin = 1.0 - f32_consts::FRAC_2_PI * ((((self + f32_consts::FRAC_PI_2).abs()) % f32_consts::TAU) - f32_consts::PI).abs(); - ((qpprox_sin * (1.0 + f32_consts::FRAC_PI_6)) - (qpprox_sin.powi(3) * f32_consts::FRAC_PI_6)) / - ((qpprox_cos * (1.0 + f32_consts::FRAC_PI_6)) - (qpprox_cos.powi(3) * f32_consts::FRAC_PI_6)) + let qpprox_sin_fpi6 = qpprox_sin * f32_consts::FRAC_PI_6; + let qpprox_cos_fpi6 = qpprox_cos * f32_consts::FRAC_PI_6; + ((qpprox_sin + qpprox_sin_fpi6) - (qpprox_sin * qpprox_sin * qpprox_sin_fpi6)) / + ((qpprox_cos + qpprox_cos_fpi6) - (qpprox_cos * qpprox_cos * qpprox_cos_fpi6)) } } impl FastTan for f64 { #[inline] fn fast_tan(self: Self) -> f64 { - let qpprox_cos = - 1.0 - f64_consts::FRAC_2_PI * - ((((self + f64_consts::PI).abs()) % f64_consts::TAU) - f64_consts::PI).abs(); - let qpprox_sin = - 1.0 - f64_consts::FRAC_2_PI * - ((((self + f64_consts::FRAC_PI_2).abs()) % f64_consts::TAU) - f64_consts::PI).abs(); - ((qpprox_sin * (1.0 + f64_consts::FRAC_PI_6)) - (qpprox_sin.powi(3) * f64_consts::FRAC_PI_6)) / - ((qpprox_cos * (1.0 + f64_consts::FRAC_PI_6)) - (qpprox_cos.powi(3) * f64_consts::FRAC_PI_6)) + let [sin_self, cos_self] = [f64_consts::PI, f64_consts::FRAC_2_PI].map( + |shift: f64| -> f64 { + let shifted = self + shift; + let qpprox = 1.0 - (f64_consts::FRAC_2_PI * ((shifted.abs() % f64_consts::TAU) - f64_consts::PI).abs()); + let qpprox_fpi6 = qpprox * f64_consts::FRAC_PI_6; + (qpprox + qpprox_fpi6) - (qpprox * qpprox * qpprox_fpi6) + } + ); + sin_self / cos_self } }