Final performance optimisations for bhmie

This commit is contained in:
2024-02-23 14:07:45 +00:00
parent 9f5bc32711
commit b595fa0fb5
6 changed files with 105 additions and 74 deletions
-4
View File
@@ -1,4 +0,0 @@
[deps]
AirspeedVelocity = "1c8270ee-6884-45cc-9545-60fa71ec23e4"
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+7 -6
View File
@@ -3,15 +3,16 @@ module Benchmarks
include("../anchors.jl")
include("ffi_wraps.jl")
import .Anchors.ROOT_DIR
import .Anchors.SRC_DIR
import .FFIWraps: bhmie_c, bhmie_fortran, bhmie_fortran77
using BenchmarkTools
using InteractiveUtils #! DEBUG
include("$ROOT_DIR/src/miemfp.jl")
include("$SRC_DIR/miemfp.jl")
function bench_vs_ffi()
# Fixed testing values
nang = UInt32(2) # Example number of angles
nang = 2 # Example number of angles
c_result = @benchmark bhmie_c(x, cxref, nang, cxs1, cxs2) setup=(
x = rand(Float32);
@@ -36,11 +37,11 @@ function bench_vs_ffi()
cxs1 = rand(ComplexF32, $nang);
cxs2 = rand(ComplexF32, $nang);
)
j_result = @benchmark miemfp.bhmie(Float64(x), ComplexF64(cxref), nang) setup=(
j_result = @benchmark miemfp.bhmie(Float64(x), ComplexF64(cxref), Int64(nang)) setup=(
x = rand(Float32);
cxref = rand(ComplexF32);
nang = UInt32($nang);
nang = $nang;
)
println("\nC Implementation")
+1 -2
View File
@@ -1,7 +1,6 @@
using Test
using Random
using PropCheck
using Debugger
using PyCall
if !@isdefined TestUtils
@@ -25,7 +24,7 @@ miemfp.bhmie(
nang::Int64,
s1::Vector{ComplexF64},
s2::Vector{ComplexF64},
) = miemfp.bhmie(x, cxref, UInt32(nang))
) = miemfp.bhmie(x, cxref, nang)
function miemfp.bhmie(
x::Float64,
+2 -2
View File
@@ -22,9 +22,9 @@ def compare_bhmie_functions(
# This is to ensure that only one instance of each function is running at a time
# to avoid memory issues in the FFI code
await event1.wait()
f1_result = f1(x, cxref, 2, cxs1, cxs2)[:2]
f1_result = f1(x, cxref, 2, cxs1, cxs2)[:2] # Only testing at nang = 2 to avoid memory issues
await event2.wait()
f2_result = f2(x, cxref, 2, cxs1, cxs2)[:2]
f2_result = f2(x, cxref, 2, cxs1, cxs2)[:2] # Only testing at nang = 2 to avoid memory issues
return np.all(np.isclose(f1_result, f2_result))