Final performance optimisations for bhmie

2026-06-11 01:04:57 +01:00 · 2024-02-23 14:07:45 +00:00
parent 9f5bc32711
commit b595fa0fb5
6 changed files with 105 additions and 74 deletions
@@ -1,4 +0,0 @@
-[deps]
-AirspeedVelocity = "1c8270ee-6884-45cc-9545-60fa71ec23e4"
-BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
@@ -3,15 +3,16 @@ module Benchmarks
 include("../anchors.jl")
 include("ffi_wraps.jl")

-import .Anchors.ROOT_DIR
+import .Anchors.SRC_DIR
 import .FFIWraps: bhmie_c, bhmie_fortran, bhmie_fortran77
 using BenchmarkTools
+using InteractiveUtils #! DEBUG

-include("$ROOT_DIR/src/miemfp.jl")
+include("$SRC_DIR/miemfp.jl")

 function bench_vs_ffi()
    # Fixed testing values
-    nang = UInt32(2)  # Example number of angles
+    nang = 2  # Example number of angles

    c_result = @benchmark bhmie_c(x, cxref, nang, cxs1, cxs2) setup=(
        x = rand(Float32);
@@ -36,11 +37,11 @@ function bench_vs_ffi()
        cxs1 = rand(ComplexF32, $nang);
        cxs2 = rand(ComplexF32, $nang);
    )
-
-    j_result = @benchmark miemfp.bhmie(Float64(x), ComplexF64(cxref), nang) setup=(
+    
+    j_result = @benchmark miemfp.bhmie(Float64(x), ComplexF64(cxref), Int64(nang)) setup=(
        x = rand(Float32);
        cxref = rand(ComplexF32);
-        nang = UInt32($nang);
+        nang = $nang;
    )

    println("\nC Implementation")
@@ -1,7 +1,6 @@
 using Test
 using Random
 using PropCheck
-using Debugger
 using PyCall

 if !@isdefined TestUtils
@@ -25,7 +24,7 @@ miemfp.bhmie(
    nang::Int64,
    s1::Vector{ComplexF64},
    s2::Vector{ComplexF64},
-) = miemfp.bhmie(x, cxref, UInt32(nang))
+) = miemfp.bhmie(x, cxref, nang)

 function miemfp.bhmie(
    x::Float64,
@@ -22,9 +22,9 @@ def compare_bhmie_functions(
        # This is to ensure that only one instance of each function is running at a time
        # to avoid memory issues in the FFI code
        await event1.wait()
-        f1_result = f1(x, cxref, 2, cxs1, cxs2)[:2]
+        f1_result = f1(x, cxref, 2, cxs1, cxs2)[:2]  # Only testing at nang = 2 to avoid memory issues
        await event2.wait()
-        f2_result = f2(x, cxref, 2, cxs1, cxs2)[:2]
+        f2_result = f2(x, cxref, 2, cxs1, cxs2)[:2]  # Only testing at nang = 2 to avoid memory issues

        return np.all(np.isclose(f1_result, f2_result))