Files
read_aconity_layers/src/rust_fn/mod.rs
Cian Hughes a1150e1ef6 Updated package to use most recent maturin version
Significant rewrite to allow this module to work with the most recetn
version of maturin. Required dependency removal for ndarray-csv and
rewrite of tests. Should future-proof for migratiosn to python3.13 in
software using this library.
2024-10-25 09:54:01 +01:00

400 lines
13 KiB
Rust

use csv::ReaderBuilder;
use glob::glob;
use indicatif::ProgressBar;
use ndarray::{concatenate, Array2, ArrayView2, Axis, Slice};
use rayon::prelude::*;
use std::fs::File;
use std::path::{Path, PathBuf};
use thiserror::Error;
#[derive(Error, Debug)]
pub enum ReadError {
#[error("Glob error: {0}")]
Glob(#[from] glob::GlobError),
#[error("Glob pattern error: {0}")]
GlobPattern(#[from] glob::PatternError),
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("CSV error: {0}")]
CSV(#[from] csv::Error),
#[error("Float parse error: {0}")]
ParseFloatError(#[from] std::num::ParseFloatError),
#[error("Miscellaneous Error")]
MiscError(String),
}
pub type Result<T> = std::result::Result<T, ReadError>;
pub fn read_layers(folder: &str) -> Result<Array2<f64>> {
let glob_string: String = folder.to_owned() + "/*.pcd";
let mut glob_iterator = glob(glob_string.as_str())?
.collect::<std::result::Result<Vec<PathBuf>, glob::GlobError>>()?;
glob_iterator.par_sort_unstable_by(|a, b| {
let az = get_z(a).expect("Filename parsing failed.");
let bz = get_z(b).expect("Filename parsing failed.");
az.partial_cmp(&bz).expect("Filename sorting failed")
});
let len: usize = glob_iterator.len();
let bar = ProgressBar::new(len as u64);
let mut arrays: Vec<Array2<f64>> = vec![Array2::<f64>::zeros((0, 0)); len];
let mut z_vals: Vec<f64> = vec![0.; len];
let mut z_lens: Vec<usize> = vec![0; len];
glob_iterator
.par_iter()
.zip(arrays.par_iter_mut())
.zip(z_vals.par_iter_mut())
.zip(z_lens.par_iter_mut())
.for_each(
|(((filepath, array_element), z_vals_element), z_lens_element)| {
let (array, z, z_len) = read_file(filepath.to_path_buf()).unwrap();
*array_element = array;
*z_vals_element = z;
*z_lens_element = z_len;
bar.inc(1)
},
);
let mut padding_arrays: Vec<Array2<f64>> = Vec::<Array2<f64>>::new();
for (z, z_len) in z_vals.iter().zip(z_lens) {
let z_array: Array2<f64> = Array2::from_elem((z_len, 1), *z);
padding_arrays.push(z_array);
}
let padding_array_views: Vec<ArrayView2<f64>> =
padding_arrays.iter().map(|x| x.view()).collect();
let array_views: Vec<ArrayView2<f64>> = arrays.iter().map(|x| x.view()).collect();
let mut out_array = concatenate(
Axis(1),
&[
concatenate(Axis(0), &array_views)
.unwrap()
.slice_axis(Axis(1), Slice::from(0..2)),
concatenate(Axis(0), &padding_array_views).unwrap().view(),
concatenate(Axis(0), &array_views)
.unwrap()
.slice_axis(Axis(1), Slice::from(2..4)),
],
)
.unwrap();
out_array.column_mut(0).par_map_inplace(correct_x);
out_array.column_mut(1).par_map_inplace(correct_y);
Ok(out_array)
}
pub fn read_selected_layers(file_list: Vec<PathBuf>) -> Result<Array2<f64>> {
let len: usize = file_list.len();
let bar = ProgressBar::new(len as u64);
let mut arrays: Vec<Array2<f64>> = vec![Array2::<f64>::zeros((0, 0)); len];
let mut z_vals: Vec<f64> = vec![0.; len];
let mut z_lens: Vec<usize> = vec![0; len];
file_list
.par_iter()
.zip(arrays.par_iter_mut())
.zip(z_vals.par_iter_mut())
.zip(z_lens.par_iter_mut())
.for_each(
|(((filepath, array_element), z_vals_element), z_lens_element)| {
let (array, z, z_len) = read_file(filepath.to_path_buf()).unwrap();
*array_element = array;
*z_vals_element = z;
*z_lens_element = z_len;
bar.inc(1)
},
);
let mut padding_arrays: Vec<Array2<f64>> = Vec::<Array2<f64>>::new();
for (z, z_len) in z_vals.iter().zip(z_lens) {
let z_array: Array2<f64> = Array2::from_elem((z_len, 1), *z);
padding_arrays.push(z_array);
}
let padding_array_views: Vec<ArrayView2<f64>> =
padding_arrays.iter().map(|x| x.view()).collect();
let array_views: Vec<ArrayView2<f64>> = arrays.iter().map(|x| x.view()).collect();
let mut out_array = concatenate(
Axis(1),
&[
concatenate(Axis(0), &array_views)
.unwrap()
.slice_axis(Axis(1), Slice::from(0..2)),
concatenate(Axis(0), &padding_array_views).unwrap().view(),
concatenate(Axis(0), &array_views)
.unwrap()
.slice_axis(Axis(1), Slice::from(2..4)),
],
)
.unwrap();
out_array.column_mut(0).par_map_inplace(correct_x);
out_array.column_mut(1).par_map_inplace(correct_y);
Ok(out_array)
}
pub fn read_layer(file: &str) -> Result<Array2<f64>> {
let (array, z, z_len) = read_file(Path::new(file).to_path_buf()).unwrap();
let z_array: Array2<f64> = Array2::from_elem((z_len, 1), z);
let z_array_view: ArrayView2<f64> = z_array.view();
let array_view: ArrayView2<f64> = array.view();
let mut out_array = concatenate(Axis(1), &[array_view, z_array_view]).unwrap();
out_array.column_mut(0).par_map_inplace(correct_x);
out_array.column_mut(1).par_map_inplace(correct_y);
Ok(out_array)
}
pub fn read_file(filepath: PathBuf) -> Result<(Array2<f64>, f64, usize)> {
let z: f64 = get_z(&filepath)?;
let file = File::open(filepath)?;
let mut rdr = ReaderBuilder::new().has_headers(false).from_reader(file);
let data = rdr
.records()
.into_iter()
.collect::<std::result::Result<Vec<csv::StringRecord>, _>>()?
.iter()
.map(|x| {
x.iter()
.map(|y| y.parse::<f64>().map_err(|e| ReadError::ParseFloatError(e)))
.collect::<Result<Vec<f64>>>()
})
.collect::<Result<Vec<_>>>()?;
let length = data.len();
let width = data[0].len(); // WARNING: assumes fixed width columns!
let mut arr: Array2<f64> = Array2::zeros((length, width));
for (data_row, mut arr_row) in data.iter().zip(arr.axis_iter_mut(Axis(0))) {
for (data_i, arr_i) in data_row.iter().zip(arr_row.iter_mut()) {
*arr_i = *data_i
}
}
Ok((arr, z, length))
}
pub fn get_z(filepath: &Path) -> Result<f64> {
Ok(filepath
.file_stem()
.ok_or(ReadError::MiscError(format!(
"No file stem found for file {}",
filepath.to_str().ok_or(ReadError::MiscError(
"No filepath found... if this happens something very weird has happened".to_owned()
))?
)))?
.to_str()
.ok_or(ReadError::MiscError("Failed to parse filename".to_owned()))?
.parse::<f64>()?)
}
pub fn correct_x(x: &mut f64) {
*x = -((((*x + 16384.) * 0.009155273) - 87.) / 1.01);
}
pub fn correct_y(y: &mut f64) {
*y = (((*y + 16384.) * 0.009155273) - 91.) / 1.02;
}
#[cfg(test)]
mod tests {
use crate::rust_fn;
use anyhow::{Error, Result};
use approx::{AbsDiffEq, UlpsEq};
use arbitrary::{Arbitrary, Result as ArbResult, Unstructured};
use arbtest::arbtest;
use flexbuffers::Reader;
use float_cmp::{ApproxEq, F64Margin};
use ndarray::{Array, Array1, Array2, Zip};
use serde::Deserialize;
use std::fs::File;
use std::io::{
BufWriter, Error as ioError, ErrorKind as ioErrorKind, Read, Result as ioResult, Write,
};
use std::path::PathBuf;
use tar::Archive;
use tempfile::{tempdir, TempDir};
use xz::read::XzDecoder;
const TEST_DATA_FILE: &str = "tests/data.tar.xz";
const Z_SCALING: f64 = f64::MAX / 100.0;
#[derive(Debug)]
struct ExampleData {
z: f64,
ar: Array2<isize>,
}
impl<'a> Arbitrary<'a> for ExampleData {
fn arbitrary(u: &mut Unstructured<'a>) -> ArbResult<Self> {
// NOTE: We need to add 1 here, if len == 0 the test will fail
let len = 1 + u
.arbitrary_len::<(isize, isize, isize, isize)>()?
.min(usize::MAX - 1);
let ar =
Array2::from_shape_simple_fn((4, len), || isize::arbitrary(u).unwrap_or_default());
// This is a clunky way to account for truncation of floats when writing filepath
let z = format!("{:.32}", f64::arbitrary(u)?.abs() / Z_SCALING)
.parse()
.map_err(|_| arbitrary::Error::IncorrectFormat)?;
Ok(ExampleData { z, ar })
}
}
#[derive(Debug)]
struct ReadResult {
z: f64,
ar: Array2<f64>,
}
impl From<ExampleData> for ReadResult {
fn from(value: ExampleData) -> ReadResult {
ReadResult {
z: value.z,
ar: value.ar.mapv(|x| x as f64),
}
}
}
impl ApproxEq for ReadResult {
type Margin = F64Margin;
fn approx_eq<T: Into<F64Margin>>(self, other: Self, margin: T) -> bool {
let margin = margin.into();
if !self.z.approx_eq(other.z, margin) {
return false;
} else if self.ar.shape() != other.ar.shape() {
return false;
} else {
Zip::from(&self.ar)
.and(&other.ar)
.fold(true, |acc, &a, &b| acc && a.approx_eq(b, margin))
}
}
}
fn write_array_to_pcd(file: File, array: &Array2<isize>) -> ioResult<()> {
let mut filebuf = BufWriter::new(file);
for row in array.rows().into_iter() {
let mut row_string = String::new();
for &item in row {
row_string.push_str(&format!("{} ", item));
}
row_string.pop(); // Remove the last space
writeln!(filebuf, "{}", row_string)?;
}
Ok(())
}
fn create_test_pcd(z: f64, ar: &Array2<isize>) -> ioResult<(TempDir, PathBuf)> {
let tmpd = tempdir()?;
let tmpfpath = tmpd.path().join(format!("{:.32}.pcd", z));
let tmpf = File::create(tmpfpath.clone())?;
write_array_to_pcd(tmpf, ar)?;
// tmpd needs to be returned or else it gets deleted when it goes out of scope
Ok((tmpd, tmpfpath))
}
#[test]
fn proptest_read_file() {
fn prop(u: &mut Unstructured<'_>) -> ArbResult<()> {
let data = ExampleData::arbitrary(u)?;
let (_tmpdir, tmpfpath) = create_test_pcd(data.z, &data.ar)
.map_err(|_e| arbitrary::Error::IncorrectFormat)?;
let (ar_out, z_out, _) =
rust_fn::read_file(tmpfpath).map_err(|_e| arbitrary::Error::IncorrectFormat)?;
let actual_result = ReadResult {
z: z_out,
ar: ar_out,
};
let example_result: ReadResult = data.into();
assert!(example_result.approx_eq(actual_result, F64Margin::default()));
Ok(())
}
arbtest(prop);
}
fn unpack_test_data(test_name: &str) -> Result<Array1<f64>> {
let compressed_file = File::open(TEST_DATA_FILE)?;
let decompressor = XzDecoder::new(compressed_file);
let mut archive = Archive::new(decompressor);
let mut buffer = Vec::new();
for maybe_entry in archive.entries()? {
let mut entry = maybe_entry?;
let path = entry.path()?;
let path_str = path.to_string_lossy();
if path_str == test_name {
entry.read_to_end(&mut buffer)?;
break;
}
}
if buffer.len() > 0 {
let flexbuf = Reader::get_root(buffer.as_slice())?;
let ar: Array1<f64> = Array1::deserialize(flexbuf)?;
Ok(ar)
} else {
let err = ioError::new(
ioErrorKind::NotFound,
format!("Entry {} not found in {}", test_name, TEST_DATA_FILE),
);
Err(Error::new(err))
}
}
#[test]
fn proptest_correct_x() {
fn prop(u: &mut Unstructured<'_>) -> ArbResult<()> {
let mut input = f64::arbitrary(u)?;
let output = rust_fn::correct_x(&mut input);
Ok(output)
}
arbtest(prop);
}
#[test]
fn regrtest_correct_x() {
let true_x = unpack_test_data("correct_x_out.flex").unwrap();
let mut test_x = Array::range(-1000000., 1000000., 1.);
test_x.par_map_inplace(rust_fn::correct_x);
assert!(true_x
.iter()
.zip(test_x.iter())
.all(|(&x, y): (&f64, &f64)| -> bool {
x.ulps_eq(y, f64::default_epsilon(), f64::default_max_ulps())
}));
}
#[test]
fn proptest_correct_y() {
fn prop(u: &mut Unstructured<'_>) -> ArbResult<()> {
let mut input = f64::arbitrary(u)?;
let output = rust_fn::correct_y(&mut input);
Ok(output)
}
arbtest(prop);
}
#[test]
fn regrtest_correct_y() {
let true_y = unpack_test_data("correct_y_out.flex").unwrap();
let mut test_y = Array::range(-1000000., 1000000., 1.);
test_y.par_map_inplace(rust_fn::correct_y);
assert!(true_y
.iter()
.zip(test_y.iter())
.all(|(&x, y): (&f64, &f64)| -> bool {
x.ulps_eq(y, f64::default_epsilon(), f64::default_max_ulps())
}));
}
}