Initial Commit

2025-12-23 05:01:58 +00:00 · 2023-08-03 21:33:03 +01:00
parent 379ecfd4f6
commit 9781c9f050
15 changed files with 15034 additions and 0 deletions
--- a/act_plot.ipynb
+++ b/act_plot.ipynb
--- a/custom_activations.py
+++ b/custom_activations.py
@@ -0,0 +1,274 @@
+import torch
+from torch import nn
+
+
+class SoftExp(nn.Module):
+    """
+    Implementation of soft exponential activation.
+    Shape:
+        - Input: (N, *) where * means, any number of additional
+          dimensions
+        - Output: (N, *), same shape as the input
+    Parameters:
+        - alpha - trainable parameter
+    References:
+        - See related paper:
+        https://arxiv.org/pdf/1602.01321.pdf
+    Examples:
+        >>> a1 = soft_exponential(256)
+        >>> x = torch.randn(256)
+        >>> x = a1(x)
+    """
+
+    def __init__(self, alpha=None, beta=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if "device" in kwargs:
+            self.device = kwargs["device"]
+        else:
+            self.device = torch.device("cpu")
+        alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
+        self.alpha = alpha.to(self.device)
+        self.alpha.requiresGrad = True  # set requiresGrad to true
+        # self.__name__ == "SoftExp"
+
+    def forward(self, x):
+        """
+        Forward pass of the function.
+        Applies the function to the input elementwise.
+        """
+        if self.alpha == 0.0:
+            return x
+
+        if self.alpha < 0.0:
+            return -torch.log(1 - self.alpha * (x + self.alpha)) / self.alpha
+
+        if self.alpha > 0.0:
+            return (torch.exp(self.alpha * x) - 1) / self.alpha + self.alpha
+
+
+@torch.jit.script
+def sech(x):
+    return 1 / torch.cosh(x)
+
+
+@torch.jit.script
+def dip(x):
+    return (-2.0261193218831233 * sech(x)) + 0.31303528549933146
+
+
+@torch.jit.script
+def bmu(x):
+    return torch.where(
+        x <= -1,
+        -1 / torch.abs(x),
+        torch.where(x >= 1, x - 2, dip(x)),
+    )
+
+
+class BMU(nn.Module):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def forward(self, input):
+        return bmu(input)
+
+
+class TrainableHybrid(nn.Module):
+    def __init__(
+        self, functions, function_args=None, function_kwargs=None, *args, **kwargs
+    ):
+        super().__init__(*args, **kwargs)
+        if function_args is None:
+            function_args = [tuple() for _ in functions]
+        if function_kwargs is None:
+            function_kwargs = [dict() for _ in functions]
+        if None in function_args:
+            function_args = [
+                tuple() if fa is None else fa for fa in function_args
+            ]
+        if None in function_kwargs:
+            function_kwargs = [
+                dict() if fk is None else fk for fk in function_kwargs
+            ]
+        self.functions = [
+            f(*fa, *fk) for f, fa, fk in zip(functions, function_args, function_kwargs)
+        ]
+        self.alpha = nn.Parameter(torch.randn(len(functions)))
+        self.normalize_alpha()
+        self.__name__ = (
+            f"TrainableHybrid{str([f.__name__ for f in functions]).replace(' ', '')}"
+        )
+    
+    def __repr__(self):
+        return self.__name__
+
+    def normalize_alpha(self) -> None:
+        self.alpha.data = self.alpha / torch.sum(self.alpha)
+
+    def apply_activations(self, input: torch.Tensor):
+        return torch.sum(
+            torch.stack(
+                [a * f(input) for f, a in zip(self.functions, self.alpha)]
+            ),
+            dim=0,
+        )
+
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        self.normalize_alpha()
+        return self.apply_activations(input)
+
+    def to(self, device):
+        super().to(device)
+        self.functions = [f.to(device) for f in self.functions]
+        return self
+
+
+class ISRU(nn.Module):
+    def __init__(self, alpha=None, beta=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if "device" in kwargs:
+            self.device = kwargs["device"]
+        else:
+            self.device = torch.device("cpu")
+        alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
+        self.alpha = alpha.to(self.device)
+        self.alpha.requiresGrad = True
+        self.__name__ = "ISRU"
+
+    def forward(self, x):
+        return x / torch.sqrt(1 + self.alpha * x**2)
+
+
+class ISRLU(nn.Module):
+    def __init__(self, alpha=1.0, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if "device" in kwargs:
+            self.device = kwargs["device"]
+        else:
+            self.device = torch.device("cpu")
+        alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
+        self.alpha = alpha.to(self.device)
+        self.alpha.requiresGrad = True
+        self.isru = ISRU(alpha)
+        self.__name__ = "ISRLU"
+
+    def forward(self, x):
+        return torch.where(x >= 0, x, self.isru(x))
+
+
+class PBessel(nn.Module):
+    def __init__(self, alpha=None, beta=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if "device" in kwargs:
+            self.device = kwargs["device"]
+        else:
+            self.device = torch.device("cpu")
+        alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
+        beta = torch.tensor(beta) if beta is not None else nn.Parameter(torch.randn(1))
+        self.alpha = alpha.to(self.device)
+        self.beta = beta.to(self.device)
+        self.alpha.requiresGrad = True
+        self.beta.requiresGrad = True
+        self.__name__ = "PBessel"
+
+    def forward(self, input):
+        gamma = 1 - self.alpha
+        return (self.alpha * torch.special.bessel_j0(self.beta * input)) + (
+            gamma * torch.special.bessel_j1(self.beta * input)
+        )
+
+
+class LeakyPReQU(nn.Module):
+    def __init__(self, alpha=None, beta=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if "device" in kwargs:
+            self.device = kwargs["device"]
+        else:
+            self.device = torch.device("cpu")
+        alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
+        beta = torch.tensor(beta) if beta is not None else nn.Parameter(torch.randn(1))
+        self.alpha = alpha.to(self.device)
+        self.beta = beta.to(self.device)
+        self.alpha.requiresGrad = True
+        self.beta.requiresGrad = True
+        self.__name__ = "LeakyPReQU"
+
+    def forward(self, input):
+        return torch.where(
+            input > 0,
+            (self.alpha * input * input) + (self.beta * input),
+            self.beta * input,
+        )
+
+
+class Sinusoid(nn.Module):
+    def __init__(self, alpha=None, beta=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if "device" in kwargs:
+            self.device = kwargs["device"]
+        else:
+            self.device = torch.device("cpu")
+        alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
+        beta = torch.tensor(beta) if beta is not None else nn.Parameter(torch.randn(1))
+        self.alpha = alpha.to(self.device)
+        self.beta = beta.to(self.device)
+        self.alpha.requiresGrad = True
+        self.beta.requiresGrad = True
+        self.__name__ = "Sinusoid"
+
+    def forward(self, input):
+        return torch.sin(self.alpha * (input + self.beta))
+
+
+class Modulo(nn.Module):
+    def __init__(self, alpha=None, beta=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if "device" in kwargs:
+            self.device = kwargs["device"]
+        else:
+            self.device = torch.device("cpu")
+        alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
+        beta = torch.tensor(beta) if beta is not None else nn.Parameter(torch.randn(1))
+        self.alpha = alpha.to(self.device)
+        self.beta = beta.to(self.device)
+        self.alpha.requiresGrad = True
+        self.beta.requiresGrad = True
+        self.__name__ = "Modulo"
+
+    def forward(self, input):
+        return torch.fmod(self.alpha * input, self.beta)
+
+
+class TriWave(nn.Module):
+    def __init__(self, alpha=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if "device" in kwargs:
+            self.device = kwargs["device"]
+        else:
+            self.device = torch.device("cpu")
+        alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
+        self.alpha = alpha.to(self.device)
+        self.alpha.requiresGrad = True
+        self.__name__ = "TriWave"
+
+    def forward(self, input):
+        return torch.abs(2 * (input / self.alpha - torch.floor(input / self.alpha + 0.5))) # noqa
+
+
+class Gaussian(nn.Module):
+    def __init__(self, alpha=None, beta=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if "device" in kwargs:
+            self.device = kwargs["device"]
+        else:
+            self.device = torch.device("cpu")
+        alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
+        beta = torch.tensor(beta) if beta is not None else nn.Parameter(torch.randn(1))
+        self.alpha = alpha.to(self.device)
+        self.beta = beta.to(self.device)
+        self.alpha.requiresGrad = True
+        self.beta.requiresGrad = True
+        self.__name__ = "Gaussian"
+
+    def forward(self, x):
+        return torch.exp(-(((x-self.alpha)**2)/(2*self.beta**2)))
--- a/data_postprocessing.ipynb
+++ b/data_postprocessing.ipynb
--- a/expt1.ipynb
+++ b/expt1.ipynb
--- a/expt1.py
+++ b/expt1.py
@@ -0,0 +1,547 @@
+# %% [markdown]
+# <h1>Experiment 1</h1>
+# <h3>Initial hyperparameter tuning</h3>
+# <p>Summary</p>
+# <ul>
+# <li>A model was created with a dynamic constructor, allowing for a hyperparameter-driven model</li>
+# <li>Hyperparameters were tuned using <code>`Optuna`</code></li>
+# <li>Training loop was constructed using <code>`PyTorchLightning`</code></li>
+# <li>Model was trained on a cluster of machines using a shared SQL trial database</li>
+# <li>An extremely aggressive pruning algorithm was used to quickly narrow in on an optimal hyperparameter space</li>
+# <li>Experiment 1 was left to train on the cluster for 2 days</li>
+# </ul>
+
+# %%
+# Data handling imports
+from dask.distributed import Client, LocalCluster
+import dask
+import dask.dataframe as dd
+import dask.array as da
+import numpy as np
+import pickle
+import random
+from itertools import chain
+from tqdm.auto import tqdm
+
+# Deep learning imports
+import torch
+from torch.utils.data import DataLoader
+from torch import nn
+from torch.nn import functional as F
+from torch import optim
+import pytorch_lightning as pl
+from torchmetrics import MeanSquaredError
+from pytorch_lightning import Trainer
+import optuna
+from optuna.pruners import HyperbandPruner
+from optuna.integration import PyTorchLightningPruningCallback
+
+
+# Suppress some warning messages from pytorch_lightning,
+# It really doesn't like that i've forced it to handle a dask array!
+import warnings
+
+warnings.filterwarnings("ignore", category=UserWarning, module=pl.__name__)
+
+# Also, set up a log to record debug messages for failed trials
+import logging
+
+logging.basicConfig(filename="debug.log", encoding="utf-8", level=logging.ERROR)
+
+# %% [markdown]
+# <h3>Patching PyTorchLightning</h3>
+# <p>
+# A key part of this project was to develop a patch for PyTorchLightning to allow for the use of <code>`dask`</code> arrays as inputs. It was important that PyTorchLightning can accept <code>`dask`</code> arrays and only load the data into memory when needed. Otherwise, our extremely large datasets would simply crash our system as they are significantly larger than the available RAM and VRAM.
+# </p><p>
+# After several versions of the patch, this final version was developed. It is a simple monkey patch that wraps the <code>pytorch_lightning.utlities.data._extract_batch_size</code> generator with a check that mimics the expected behaviour for torch tensors when given a dask array and extends its type signature to ensure static analysis is still possible.
+# </p><p>
+# With this patch applied, the forward method in our model can accept a dask array and only compute each chunk of the array when needed. This allows us to train our model on datasets that are significantly larger than the available memory.
+# </p>
+
+# %%
+# Monkey patch to allow pytorch lightning to accept a dask array as a model input
+from typing import Any, Generator, Iterable, Mapping, Optional, Union
+
+BType = Union[da.Array, torch.Tensor, str, Mapping[Any, "BType"], Iterable["BType"]]
+
+unpatched = pl.utilities.data._extract_batch_size
+
+
+def patch(batch: BType) -> Generator[Optional[int], None, None]:
+    if isinstance(batch, da.core.Array):
+        if len(batch.shape) == 0:
+            yield 1
+        else:
+            yield batch.shape[0]
+    else:
+        yield from unpatched(batch)
+
+
+pl.utilities.data._extract_batch_size = patch
+
+# %%
+# Set the device to use with torch
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+# Prepare a dask cluster and client
+def create_client():
+    cluster = LocalCluster(n_workers=2, threads_per_worker=1)
+    client = Client(cluster)
+    return client
+
+if __name__ == "__main__":
+    client = create_client()
+
+# %%
+# Load X and y for training
+samples = list(range(1, 82))
+
+with open("sample_X.pkl", "rb") as f:
+    X = pickle.load(f)
+
+with open("sample_y.pkl", "rb") as f:
+    y = pickle.load(f)
+
+# %% [markdown]
+# <h3>Dataset Splitting</h3>
+# <p>The dataset is split into a training and validation dataset (80:20 split). Because the number of available samples is extremely small, we haven't produced a test dataset. In the future, as more data is obtained, a test set should be included whenever possible.</p>
+
+# %%
+# Separate samples into training and validation sets
+val_samples = random.sample(samples, k=len(samples) // 5)
+train_samples = [s for s in samples if s not in val_samples]
+
+X_train = {i: X[i] for i in train_samples}
+X_val = {i: X[i] for i in val_samples}
+y_train = {i: y[i] for i in train_samples}
+y_val = {i: y[i] for i in val_samples}
+
+# %% [markdown]
+# <h3>Dataset Collation</h3>
+# <p>This function returns a closure for collating our data in a torch DataLoader. The use of a DataLoader will allow us to shuffle and prefetch data, reducing overfitting and maximising performance as IO will be a bottleneck. The closure is dynamically constructed, allowing us to select the outputs we train against. However, for this experiment we will match against all outputs for simplicity.</p>
+
+# %%
+# Create a function to dynamically modify data collation
+def collate_fn(batch):
+    X0 = batch[0][0][0].to_numpy(dtype=np.float32)[0]
+    X1 = batch[0][0][1].to_dask_array(lengths=True)
+    y = batch[0][1].to_numpy(dtype=np.float32)
+    return (
+        torch.from_numpy(X0).to(device),
+        X1,
+        torch.from_numpy(y).to(device),
+    )
+
+# %% [markdown]
+# <h3>Convolutional Data Compression</h3>
+# <p>
+# The <code>`DaskCompression`</code> module accepts a dask array, and applies a convolutional kernel to it to significantly compress the input data. This allows us to transform a larger than VRAM dataset into one that can fit on our GPU, and (hopefully) retain the relevant information to train the rest of our model on.
+# </p><p>
+# Note how the kernel is only computed in line 12 and is immediately compressed via convolution. This ensures that only one kernel needs to be stored in memory at a time, avoiding the need to hold the entire dataset in memory at once.
+# </p>
+
+# %%
+class DaskCompression(nn.Module):
+    def __init__(
+        self, in_channels, out_channels, kernel_size, chunk_size=1, device=device
+    ):
+        super(DaskCompression, self).__init__()
+        self.kernel_size = kernel_size
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.chunk_size = chunk_size
+        self.device = device
+        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size).to(device)
+
+    def compress_kernel(self, kernel):
+        return (
+            self.conv(torch.from_numpy(kernel.compute()).to(self.device))
+            .squeeze()
+            .to("cpu")  # return to cpu to save VRAM
+        )
+
+    def forward(self, x):
+        # Precompute the dimensions of the output array
+        dim0, dim2 = x.shape
+        assert dim2 == self.in_channels
+        dim0 = (dim0 // self.kernel_size) // self.chunk_size
+        x = x.reshape(dim0, self.chunk_size, self.kernel_size, dim2)
+        x = da.transpose(x, axes=(0, 1, 3, 2))
+
+        x = [self.compress_kernel(kernel) for kernel in x]
+        return torch.stack(x).to(self.device)
+
+
+
+# %% [markdown]
+# <h3>Model Design</h3>
+# <p>
+# The model was designed to be a dynamically constructed, hyperparameter driven model for ease of hyperparameter optimisation. The contructed model will process data in the following way:
+# </p>
+# <ol>
+# <li>The input is left/right padded to a multiple of the compressor kernel size</li>
+# <li>The dask array is compressed by a <code>`DaskCompressor`</code> layer, treating each input as a channel</li>
+# <li>The compressed array is then recursively convoluted down to a size less than or equal to the width of our feedforward network</li>
+# <li>The channels of the now convolved data are combined</li>
+# <li>The combined, flattened data is then left/right padded to the width of the feedforward network</li>
+# <li>Finally, the data is fed into a feedforward network</li>
+# </ol>
+# <p>
+# This relatively simple design allows the network to accept both larger-than-ram datasets as inputs, and datasets of variable sizes. This makes it suitable for training on whole Aconity datasets, without the need for culling or binning.
+# </p>
+
+# %%
+class Model(pl.LightningModule):
+    def __init__(
+        self,
+        # pl attributes
+        optimizer=torch.optim.Adam,
+        optimizer_args=(),
+        optimizer_kwargs={},
+        scheduler=None,
+        scheduler_kwargs={},
+        loss=torch.nn.MSELoss(),
+        train_ds=None,
+        val_ds=None,
+        # model args & kwargs
+        compressor_kernel_size=128,
+        compressor_chunk_size=128,
+        compressor_act=(nn.ReLU, (), {}),
+        conv_kernel_size=128,
+        conv_norm=False,
+        conv_act=(nn.ReLU, (), {}),
+        channel_combine_act=(nn.ReLU, (), {}),
+        param_ff_depth=4,
+        param_ff_width=16,
+        param_ff_act=(nn.ReLU, (), {}),
+        ff_width=512,
+        ff_depth=4,
+        ff_act=(nn.ReLU, (), {}),
+        out_size=6,
+        out_act=(nn.ReLU, (), {}),
+    ):
+        super().__init__()
+        # Assign necessary attributes for pl model
+        self.optimizer = optimizer
+        self.optimizer_args = optimizer_args
+        self.optimizer_kwargs = optimizer_kwargs
+        self.scheduler = scheduler
+        self.scheduler_kwargs = scheduler_kwargs
+        self.loss = loss
+        self.mse = MeanSquaredError()
+        self.train_ds = train_ds
+        self.val_ds = val_ds
+        # Attrs for dynamically created model to be tested
+        self.compressor_kernel_size = compressor_kernel_size
+        self.compressor_chunk_size = compressor_chunk_size
+        self.conv_kernel_size = conv_kernel_size
+        self.ff_width = ff_width
+        self.ff_depth = ff_depth
+        self.out_size = out_size
+        # layers
+        # compressor compresses and converts dask array to torch tensor
+        self.convolutional_compressor = DaskCompression(
+            5,
+            5,
+            kernel_size=compressor_kernel_size,
+            chunk_size=compressor_chunk_size,
+        )
+        self.compressor_act = compressor_act[0](*compressor_act[1], **compressor_act[2])
+        # convolutional layer recursively applies convolutions to the compressed input
+        self.conv = nn.Conv1d(5, 5, kernel_size=conv_kernel_size)
+        self.conv_norm = nn.LocalResponseNorm(5) if conv_norm else nn.Identity()
+        self.conv_act = conv_act[0](*conv_act[1], **conv_act[2])
+        self.combine_channels = nn.Conv1d(5, 1, kernel_size=1)
+        self.channel_combine_act = channel_combine_act[0](
+            *channel_combine_act[1], **channel_combine_act[2]
+        )
+        self.param_ff = nn.Sequential(
+            nn.Linear(4, param_ff_width),
+            param_ff_act[0](*param_ff_act[1], **param_ff_act[2]),
+            *chain(
+                *(
+                    (
+                        nn.Linear(param_ff_width, param_ff_width),
+                        param_ff_act[0](*param_ff_act[1], **param_ff_act[2]),
+                    )
+                    for _ in range(param_ff_depth)
+                )
+            ),
+        )
+        self.ff = nn.Sequential(
+            nn.Linear(ff_width + param_ff_width, ff_width),
+            ff_act[0](*ff_act[1], **ff_act[2]),
+            *chain(
+                *(
+                    (
+                        nn.Linear(ff_width, ff_width),
+                        ff_act[0](*ff_act[1], **ff_act[2]),
+                    )
+                    for _ in range(ff_depth)
+                )
+            ),
+        )
+        self.out_dense = nn.Linear(ff_width, out_size)
+        self.out_act = out_act[0](*out_act[1], **out_act[2])
+
+    @staticmethod
+    def pad_ax0_to_multiple_of(x, multiple_of):
+        padding = (((x.shape[0] // multiple_of) + 1) * multiple_of) - x.shape[0]
+        left_pad = padding // 2
+        right_pad = padding - left_pad
+        return da.pad(
+            x, ((left_pad, right_pad), (0, 0)), mode="constant", constant_values=0
+        )
+
+    def pad_to_ff_width(self, x):
+        padding = self.ff_width - x.shape[1]
+        left_pad = padding // 2
+        right_pad = padding - left_pad
+        return F.pad(
+            x,
+            (right_pad, left_pad, 0, 0),
+            mode="constant",
+            value=0.0,
+        )
+
+    def forward(self, x0, x1):
+        # pad to a multiple of kernel_size * chunk_size
+        x1 = self.pad_ax0_to_multiple_of(
+            x1, self.compressor_kernel_size * self.compressor_chunk_size
+        )
+        x1 = self.convolutional_compressor(x1)
+        x1 = x1.reshape(x1.shape[0] * x1.shape[1], x1.shape[2]).T.unsqueeze(0)
+        while x1.shape[2] > self.ff_width:
+            x1 = self.conv(x1)
+            x1 = self.conv_norm(x1)
+            x1 = self.conv_act(x1)
+        x1 = self.combine_channels(x1)
+        x1 = self.channel_combine_act(x1)
+        x1 = x1.squeeze(1)
+        x1 = self.pad_to_ff_width(x1)
+        x0 = x0.unsqueeze(0)
+        x0 = self.param_ff(x0)
+        x = torch.cat((x1, x0), dim=1)
+        x = self.ff(x)
+        x = self.out_dense(x)
+        x = self.out_act(x)
+        return x
+
+    def configure_optimizers(self):
+        optimizer = self.optimizer(
+            self.parameters(), *self.optimizer_args, **self.optimizer_kwargs
+        )
+        if self.scheduler is not None:
+            scheduler = self.scheduler(optimizer, **self.scheduler_kwargs)
+            return optimizer, scheduler
+        else:
+            return optimizer
+
+    def train_dataloader(self):
+        return self.train_ds
+
+    def val_dataloader(self):
+        return self.val_ds
+
+    def training_step(self, batch, batch_idx):
+        x0, x1, y = batch
+        y_hat = self(x0, x1)
+        loss = self.loss(y_hat, y)
+        self.log("train_loss", loss)
+        mse = self.mse(y_hat, y)
+        self.log('train_MSE', mse, on_step=True, on_epoch=True, prog_bar=True)
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        x0, x1, y = batch
+        y_hat = self(x0, x1)
+        loss = self.loss(y_hat, y)
+        self.log("val_loss", loss)
+        mse = self.mse(y_hat, y)
+        self.log('train_MSE', mse, on_step=True, on_epoch=True, prog_bar=True)
+
+# %% [markdown]
+# <h3>Activation Functions</h3>
+# <p>
+# For our hyperparameter optimisation, we intend to test all the activation functions in PyTorch. In addition to the builtin activations, we will also train using the following custom implemented activation functions from literature or our own design:
+# </p>
+# <ol>
+# <li><b><code>BMU</code>:</b> Bio-Mimicking Unit; an activation function designed to mimic the activation potential of a biological neuron.</li>
+# <li><b><code>SoftExp</code>:</b> Soft Exponential function; a parametric activation function that fits to a wide variety of exponential curves (DOI: <a href=https://arxiv.org/abs/1602.01321v1>10.48550/arXiv.1602.01321</a>)</li>
+# <li><b><code>LeakyPReQU</code>:</b> Leaky Parametric Rectified Quadratic Unit; A smoothly and continuously differentiable function that is a parametrically sloped line for <code>x&#8924;0</code> and a quadratic curve for <code>x&gt;0</code></li>
+# <li><b><code>ISRU</code>:</b> Inverse Square Root Unit; a somewhat uncommon function that can be useful in models such as this as it yields a continuously differentiable curve while being extremely fast to compute using bit manipulation</li>
+# <li><b><code>ISRLU</code>:</b> Inverse Square Root Linear Unit; a modified ISRU that is an ISRU for <code>x&lt;0</code> and <code>`f(x)=x`</code> for <code>x&#8925;0</code> (DOI: <a href=https://arxiv.org/abs/1710.09967>10.48550/arXiv.1710.09967</a>)</li>
+# <li><b><code>PBessel</code>:</b> Parametric Besse; A parametric Bessel curve yielding various different wave formations depending on a trainable parameter</li>
+# <li><b><code>Sinusoid</code>:</b> A parametric sine wave, with amplitude and wavelength as trainable parameters</li>
+# <li><b><code>Modulo</code>:</b> A parametric sawtooth wave, <code>`f(x)=x%&#593;</code> where &#593; is a trainable parameter</li>
+# <li><b><code>TriWave</code>:</b> A parametric triangle wave, with amplitude and wavelength as trainable parameters</li>
+# <li><b><code>Gaussian</code>:</b> A parametric gaussian curve, with trainable amplitude</li>
+# </ol>
+
+# %%
+# Create a dispatcher including all builtin activations and
+# Several custom activations from experimentation or literature
+from custom_activations import SoftExp, PBessel
+
+
+activation_dispatcher = {
+    "Tanh": nn.Tanh,
+    "SiLU": nn.SiLU,
+    "Softplus": nn.Softplus,
+    "SoftExp": SoftExp,
+    "PBessel": PBessel,
+}
+
+# %% [markdown]
+# <h3>Hyperparameter training</h3>
+# <p>Here, we define an objective function, describing what we want Optuna to do during each trial and how to react to various errors and/or situations that may arise. To summarise the objective:</p>
+# <ul>
+# <li>Optuna selects hyperparameters for all input parameters within the given constraints</li>
+# <li>A model is generated using the selected hyperparameters</li>
+# <li>PyTorchLightning trains the model through 2 epochs</li>
+# <li>The model is evaluated on the validation set</li>
+# <li>The validation loss is returned to Optuna</li>
+# </ul>
+# <p>
+# Optuna monitors the reported validation loss and attempts to minimise it. An extremely aggressive pruning strategy known as "hyperband pruning" is used to efficiently reduce down the parameter space to something more reasonable. Any parameter set which optuna deems suboptimal will be immediately pruned or even stopped early to save time.
+# </p>
+
+# %%
+# Test parameters
+n_epochs = 2
+output_keys = list(next(iter(y_train.values())).keys())
+activation_vals = list(activation_dispatcher.keys())
+
+
+# Next we define the objective function for the hyperparameter optimization
+def objective(trial):
+    torch.cuda.empty_cache()
+    objective_value = torch.inf
+    model = None
+    logger = None
+    try:
+        # Select hyperparameters for testing
+        compressor_kernel_size = 128
+        compressor_chunk_size = 128
+        compressor_act = (
+            activation_dispatcher[
+                trial.suggest_categorical("compressor_act", activation_vals)
+            ],
+            (),
+            {},
+        )
+        conv_kernel_size = 128
+        conv_norm = trial.suggest_categorical("conv_norm", [True, False])
+        conv_act = (
+            activation_dispatcher[
+                trial.suggest_categorical("conv_act", activation_vals)
+            ],
+            (),
+            {},
+        )
+        channel_combine_act = (
+            activation_dispatcher[
+                trial.suggest_categorical("channel_combine_act", activation_vals)
+            ],
+            (),
+            {},
+        )
+        param_ff_depth = trial.suggest_int("param_ff_depth", 2, 8, 2)
+        param_ff_width = trial.suggest_int("param_ff_width", 16, 64, 16)
+        param_ff_act = (
+            activation_dispatcher[
+                trial.suggest_categorical("param_ff_act", activation_vals)
+            ],
+            (),
+            {},
+        )
+        ff_width = trial.suggest_int("ff_width", 256, 1025, 256)
+        ff_depth = trial.suggest_int("ff_depth", 2, 8, 2)
+        ff_act = (
+            activation_dispatcher[trial.suggest_categorical("ff_act", activation_vals)],
+            (),
+            {},
+        )
+        out_size = 2
+        out_act = (nn.Sigmoid, tuple(), dict())
+
+        # Set up the model architecture and other necessary components
+        model = Model(
+            compressor_kernel_size=compressor_kernel_size,
+            compressor_chunk_size=compressor_chunk_size,
+            compressor_act=compressor_act,
+            conv_kernel_size=conv_kernel_size,
+            conv_act=conv_act,
+            conv_norm=conv_norm,
+            channel_combine_act=channel_combine_act,
+            param_ff_depth=param_ff_depth,
+            param_ff_width=param_ff_width,
+            param_ff_act=param_ff_act,
+            ff_width=ff_width,
+            ff_depth=ff_depth,
+            ff_act=ff_act,
+            out_size=out_size,
+            out_act=out_act,
+        ).to(device)
+
+        trainer = Trainer(
+            accelerator="gpu",
+            max_epochs=n_epochs,
+            devices=1,
+            logger=logger,
+            num_sanity_val_steps=0,  # Needs to be disabled or else we get an error because X is dask array
+            # precision="16-mixed",
+            callbacks=[
+                PyTorchLightningPruningCallback(trial, monitor="val_loss"),
+            ],
+        )
+        # Prepare datasets
+        train = DataLoader(
+            list(zip(X_train.values(), y_train.values())),
+            collate_fn=collate_fn,
+            shuffle=True,
+        )
+        valid = DataLoader(
+            list(zip(X_val.values(), y_val.values())),
+            shuffle=True,
+            collate_fn=collate_fn,
+        )
+        # Finally, train the model
+        trainer.fit(model, train, valid)
+    except Exception as e:
+        logging.exception(f"An exception occurred in trial {trial.number}: {e}")
+        raise optuna.exceptions.TrialPruned()
+    finally:
+        if logger is not None:
+            logger.experiment.unwatch(model)
+            logger.experiment.finish()
+    del model
+    torch.cuda.empty_cache()
+    if objective_value == torch.inf:
+        raise optuna.exceptions.TrialPruned()
+    return objective_value
+
+# %% [markdown]
+# <h3>Hyperparameter Optimisation on a Computing Cluster</h3>
+# <p>
+# The final important step is to run the optimisation using a cluster of computers to maximise the number of trials that can be run in parallel. Although this could be achieved using a more complex, scheduler controlled system and dask, we will use the far simpler approach of using a shared SQL ledger to keep track of the trials and their results. This is a very simple approach, but it is sufficient for our purposes, and is easy to implement. Using this approach, the model was trained on a cluster of 5 computers at once.
+# </p>
+
+# %%
+if __name__ == "__main__":
+    # storage_name = "sqlite:///optuna.sql"
+    storage_name = "mysql+pymysql://root:Ch31121992@192.168.1.10:3306/optuna_db"
+    study_name = "Composition Experiment 1"
+    study = optuna.create_study(
+        study_name=study_name,
+        storage=storage_name,
+        direction="minimize",
+        pruner=HyperbandPruner(),
+        load_if_exists=True,
+    )
+    study.optimize(
+        objective,
+        n_trials=None,
+        timeout=None,
+    )
+
+
--- a/expt1_analysis.ipynb
+++ b/expt1_analysis.ipynb
@@ -0,0 +1,133 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Data handling imports\n",
+    "import numpy as np\n",
+    "import pickle\n",
+    "import random\n",
+    "from tqdm.auto import tqdm\n",
+    "import optuna"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "storage_name = \"mysql+pymysql://root:Ch31121992@192.168.1.10:3306/optuna_db\"\n",
+    "study_name = \"Experiment 1\"\n",
+    "study = optuna.load_study(\n",
+    "    study_name=study_name,\n",
+    "    storage=storage_name,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = study.trials_dataframe()\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.dropna(inplace=True)\n",
+    "df.sort_values(by=\"value\", inplace=True)\n",
+    "df.drop(df[\"value\"].idxmax(), inplace=True)\n",
+    "df.drop(df[\"value\"].idxmax(), inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "pd.options.plotting.backend = \"plotly\"\n",
+    "params = list(df.keys()[5:-1])\n",
+    "for p in params:\n",
+    "    df.plot(x=p, y=\"value\", kind=\"scatter\", title=p)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "params = list(df.keys()[5:-1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!poetry add tabulate\n",
+    "from tabulate import tabulate\n",
+    "print(\n",
+    "    tabulate(\n",
+    "        (x[0] for x in sorted(list(df.groupby(params)), key=lambda x: x[1][\"value\"].mean())),\n",
+    "        headers = params,\n",
+    "        tablefmt = \"grid\",\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for p in params:\n",
+    "    df.plot(x=p, y=\"value\", kind=\"scatter\", title=p).show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/expt2.ipynb
+++ b/expt2.ipynb
@@ -0,0 +1,366 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<h1>Experiment 2</h1>\n",
+    "<h3>Targetted hyperparameter tuning</h3>\n",
+    "<p>\n",
+    "By examining the results of expt1, a smaller range of hyperparameters for expt2 was chosen. This allowed for a more targetted search of the hyperparameter space to find an optimal configuration. The selected parameters for expt2 were as follows:\n",
+    "</p>\n",
+    "<ul>\n",
+    "<li>in_act = Linear, Mish, PBessel, or Tanhshrink</li>\n",
+    "<li>compressor_kernel_size = 128</li>\n",
+    "<li>compressor_act = Softshrink, SoftExp, or PReLU</li>\n",
+    "<li>conv_kernel_size = 128</li>\n",
+    "<li>conv_act = Sigmoid or PBessel</li>\n",
+    "<li>channel_combine_act = HardSigmoid or GELU</li>\n",
+    "<li>ff_width = 512</li>\n",
+    "<li>ff_depth = 2, 4, or 6</li>\n",
+    "<li>ff_act = CELU</li>\n",
+    "<li>out_act = Tanhshrink or Mish</li>\n",
+    "</ul>\n",
+    "<p>\n",
+    "Several of the parameters were able to be fixed to a specific value, and the remaining parameters (with the exception of <code>`in_act`</code>) were reduced to only 2 or 3 possible values, dramatically shrinking the parameter space. For this reason, a significantly less aggressive pruning algorithm was used, allowing for a more thorough search of the parameter space.\n",
+    "</p>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Data handling imports\n",
+    "from dask.distributed import Client, LocalCluster\n",
+    "import dask\n",
+    "import dask.dataframe as dd\n",
+    "import dask.array as da\n",
+    "import numpy as np\n",
+    "import pickle\n",
+    "import random\n",
+    "from itertools import chain\n",
+    "from tqdm.auto import tqdm\n",
+    "\n",
+    "# Deep learning imports\n",
+    "import torch\n",
+    "from torch.utils.data import DataLoader\n",
+    "from torch import nn\n",
+    "from torch.nn import functional as F\n",
+    "from torch import optim\n",
+    "import pytorch_lightning as pl\n",
+    "from pytorch_lightning import Trainer\n",
+    "import optuna\n",
+    "from optuna.pruners import HyperbandPruner\n",
+    "from optuna.integration import PyTorchLightningPruningCallback\n",
+    "\n",
+    "# Suppress some warning messages from pytorch_lightning,\n",
+    "# It really doesn't like that i've forced it to handle a dask array!\n",
+    "import warnings\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\", category=UserWarning, module=pl.__name__)\n",
+    "\n",
+    "# Also, set up a log to record debug messages for failed trials\n",
+    "import logging\n",
+    "\n",
+    "logging.basicConfig(filename=\"debug_test.log\", encoding=\"utf-8\", level=logging.DEBUG)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from expt1 import (\n",
+    "    Model,\n",
+    "    Linear,\n",
+    "    device,\n",
+    "    activation_dispatcher,\n",
+    "    X_train,\n",
+    "    y_train,\n",
+    "    X_val,\n",
+    "    y_val,\n",
+    "    create_collate_fn,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cluster = LocalCluster(n_workers=8, threads_per_worker=1)\n",
+    "client = Client(cluster)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Monkey patch to allow pytorch lightning to accept a dask array as a model input\n",
+    "from typing import Any, Generator, Iterable, Mapping, Optional, Union\n",
+    "\n",
+    "BType = Union[da.Array, torch.Tensor, str, Mapping[Any, \"BType\"], Iterable[\"BType\"]]\n",
+    "\n",
+    "unpatched = pl.utilities.data._extract_batch_size\n",
+    "\n",
+    "\n",
+    "def patch(batch: BType) -> Generator[Optional[int], None, None]:\n",
+    "    if isinstance(batch, da.core.Array):\n",
+    "        if len(batch.shape) == 0:\n",
+    "            yield 1\n",
+    "        else:\n",
+    "            yield batch.shape[0]\n",
+    "    else:\n",
+    "        yield from unpatched(batch)\n",
+    "\n",
+    "\n",
+    "pl.utilities.data._extract_batch_size = patch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Test parameters\n",
+    "n_epochs = 10\n",
+    "output_keys = list(next(iter(y_train.values())).keys())\n",
+    "activation_vals = list(activation_dispatcher.keys())\n",
+    "\n",
+    "\n",
+    "# Next we define the objective function for the hyperparameter optimization\n",
+    "def objective(trial):\n",
+    "    torch.cuda.empty_cache()\n",
+    "    objective_value = torch.inf\n",
+    "    model = None\n",
+    "    logger = None\n",
+    "    try:\n",
+    "        # Select hyperparameters for testing\n",
+    "        in_act = (\n",
+    "            activation_dispatcher[trial.suggest_categorical(\"in_act\", activation_vals)],\n",
+    "            (),\n",
+    "            {},\n",
+    "        )\n",
+    "        compressor_kernel_size = trial.suggest_int(\n",
+    "            \"compressor_kernel_size\", 64, 257, 64\n",
+    "        )\n",
+    "        compressor_chunk_size = 128\n",
+    "        compressor_act = (\n",
+    "            activation_dispatcher[\n",
+    "                trial.suggest_categorical(\"compressor_act\", activation_vals)\n",
+    "            ],\n",
+    "            (),\n",
+    "            {},\n",
+    "        )\n",
+    "        conv_kernel_size = trial.suggest_int(\"conv_kernel_size\", 64, 257, 64)\n",
+    "        conv_act = (\n",
+    "            activation_dispatcher[\n",
+    "                trial.suggest_categorical(\"conv_act\", activation_vals)\n",
+    "            ],\n",
+    "            (),\n",
+    "            {},\n",
+    "        )\n",
+    "        channel_combine_act = (\n",
+    "            activation_dispatcher[\n",
+    "                trial.suggest_categorical(\"channel_combine_act\", activation_vals)\n",
+    "            ],\n",
+    "            (),\n",
+    "            {},\n",
+    "        )\n",
+    "        ff_width = trial.suggest_int(\"ff_width\", 256, 1025, 256)\n",
+    "        ff_depth = trial.suggest_int(\"ff_depth\", 2, 8, 2)\n",
+    "        ff_act = (\n",
+    "            activation_dispatcher[trial.suggest_categorical(\"ff_act\", activation_vals)],\n",
+    "            (),\n",
+    "            {},\n",
+    "        )\n",
+    "        out_size = len(output_keys)\n",
+    "        out_act = (\n",
+    "            activation_dispatcher[\n",
+    "                trial.suggest_categorical(\"out_act\", activation_vals)\n",
+    "            ],\n",
+    "            (),\n",
+    "            {},\n",
+    "        )\n",
+    "\n",
+    "        # Set up the model architecture and other necessary components\n",
+    "        model = Model(\n",
+    "            in_act=in_act,\n",
+    "            compressor_kernel_size=compressor_kernel_size,\n",
+    "            compressor_chunk_size=compressor_chunk_size,\n",
+    "            compressor_act=compressor_act,\n",
+    "            conv_kernel_size=conv_kernel_size,\n",
+    "            conv_act=conv_act,\n",
+    "            channel_combine_act=channel_combine_act,\n",
+    "            ff_width=ff_width,\n",
+    "            ff_depth=ff_depth,\n",
+    "            ff_act=ff_act,\n",
+    "            out_size=out_size,\n",
+    "            out_act=out_act,\n",
+    "        ).to(device)\n",
+    "\n",
+    "        trainer = Trainer(\n",
+    "            accelerator=\"gpu\",\n",
+    "            max_epochs=n_epochs,\n",
+    "            devices=1,\n",
+    "            logger=logger,\n",
+    "            num_sanity_val_steps=0,  # Needs to be disabled or else we get an error because X is dask array\n",
+    "            # precision=\"16-mixed\",\n",
+    "            callbacks=[\n",
+    "                PyTorchLightningPruningCallback(trial, monitor=\"val_loss\"),\n",
+    "            ],\n",
+    "        )\n",
+    "        # Prepare datasets\n",
+    "        train = DataLoader(\n",
+    "            list(zip(X_train.values(), y_train.values())),\n",
+    "            collate_fn=create_collate_fn(),\n",
+    "            shuffle=True,\n",
+    "        )\n",
+    "        valid = DataLoader(\n",
+    "            list(zip(X_val.values(), y_val.values())),\n",
+    "            shuffle=True,\n",
+    "            collate_fn=create_collate_fn(),\n",
+    "        )\n",
+    "        # Finally, train the model\n",
+    "        trainer.fit(model, train, valid)\n",
+    "    except torch.cuda.OutOfMemoryError as e:\n",
+    "        logging.warning(f\"Ran out of memory in trial {trial.number}!\")\n",
+    "        raise optuna.exceptions.TrialPruned()\n",
+    "    except Exception as e:\n",
+    "        logging.exception(f\"An exception occurred in trial {trial.number}: {e}\")\n",
+    "        raise optuna.exceptions.TrialPruned()\n",
+    "    finally:\n",
+    "        if logger is not None:\n",
+    "            logger.experiment.unwatch(model)\n",
+    "            logger.experiment.finish()\n",
+    "    del model\n",
+    "    torch.cuda.empty_cache()\n",
+    "    if objective_value == torch.inf:\n",
+    "        raise optuna.exceptions.TrialPruned()\n",
+    "    return objective_value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[I 2023-07-31 23:49:15,744] Using an existing study with name 'Experiment 2' instead of creating a new one.\n",
+      "[I 2023-07-31 23:49:16,553] Trial 221 pruned. \n",
+      "[I 2023-07-31 23:49:16,928] Trial 222 pruned. \n",
+      "[I 2023-07-31 23:49:17,318] Trial 223 pruned. \n",
+      "[I 2023-07-31 23:49:17,682] Trial 224 pruned. \n",
+      "[W 2023-07-31 23:49:18,028] Trial 225 failed with parameters: {} because of the following error: KeyboardInterrupt().\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py\", line 200, in _run_trial\n",
+      "    value_or_values = func(trial)\n",
+      "                      ^^^^^^^^^^^\n",
+      "  File \"/tmp/ipykernel_562333/3392796582.py\", line 16, in objective\n",
+      "    activation_dispatcher[trial.suggest_categorical(\"in_act\", activation_vals)],\n",
+      "                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/trial/_trial.py\", line 405, in suggest_categorical\n",
+      "    return self._suggest(name, CategoricalDistribution(choices=choices))\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/trial/_trial.py\", line 630, in _suggest\n",
+      "    param_value = self.study.sampler.sample_independent(\n",
+      "                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/sampler.py\", line 471, in sample_independent\n",
+      "    mpe_above = _ParzenEstimator(\n",
+      "                ^^^^^^^^^^^^^^^^^\n",
+      "  File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py\", line 75, in __init__\n",
+      "    distributions=[\n",
+      "                  ^\n",
+      "  File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py\", line 76, in <listcomp>\n",
+      "    self._calculate_distributions(\n",
+      "  File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py\", line 154, in _calculate_distributions\n",
+      "    return self._calculate_categorical_distributions(\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py\", line 192, in _calculate_categorical_distributions\n",
+      "    weights /= weights.sum(axis=1, keepdims=True)\n",
+      "               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/numpy/core/_methods.py\", line 47, in _sum\n",
+      "    def _sum(a, axis=None, dtype=None, out=None, keepdims=False,\n",
+      "    \n",
+      "KeyboardInterrupt\n",
+      "[W 2023-07-31 23:49:18,040] Trial 225 failed with value None.\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[5], line 11\u001b[0m\n\u001b[1;32m      3\u001b[0m study_name \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mExperiment 2\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m      4\u001b[0m study \u001b[39m=\u001b[39m optuna\u001b[39m.\u001b[39mcreate_study(\n\u001b[1;32m      5\u001b[0m     study_name\u001b[39m=\u001b[39mstudy_name,\n\u001b[1;32m      6\u001b[0m     storage\u001b[39m=\u001b[39mstorage_name,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m      9\u001b[0m     load_if_exists\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m,\n\u001b[1;32m     10\u001b[0m )\n\u001b[0;32m---> 11\u001b[0m study\u001b[39m.\u001b[39;49moptimize(\n\u001b[1;32m     12\u001b[0m     objective,\n\u001b[1;32m     13\u001b[0m     n_trials\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m,\n\u001b[1;32m     14\u001b[0m     timeout\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m,\n\u001b[1;32m     15\u001b[0m )\n",
+      "File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/study/study.py:443\u001b[0m, in \u001b[0;36mStudy.optimize\u001b[0;34m(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)\u001b[0m\n\u001b[1;32m    339\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39moptimize\u001b[39m(\n\u001b[1;32m    340\u001b[0m     \u001b[39mself\u001b[39m,\n\u001b[1;32m    341\u001b[0m     func: ObjectiveFuncType,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    348\u001b[0m     show_progress_bar: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m,\n\u001b[1;32m    349\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    350\u001b[0m \u001b[39m    \u001b[39m\u001b[39m\"\"\"Optimize an objective function.\u001b[39;00m\n\u001b[1;32m    351\u001b[0m \n\u001b[1;32m    352\u001b[0m \u001b[39m    Optimization is done by choosing a suitable set of hyperparameter values from a given\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    440\u001b[0m \u001b[39m            If nested invocation of this method occurs.\u001b[39;00m\n\u001b[1;32m    441\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 443\u001b[0m     _optimize(\n\u001b[1;32m    444\u001b[0m         study\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m,\n\u001b[1;32m    445\u001b[0m         func\u001b[39m=\u001b[39;49mfunc,\n\u001b[1;32m    446\u001b[0m         n_trials\u001b[39m=\u001b[39;49mn_trials,\n\u001b[1;32m    447\u001b[0m         timeout\u001b[39m=\u001b[39;49mtimeout,\n\u001b[1;32m    448\u001b[0m         n_jobs\u001b[39m=\u001b[39;49mn_jobs,\n\u001b[1;32m    449\u001b[0m         catch\u001b[39m=\u001b[39;49m\u001b[39mtuple\u001b[39;49m(catch) \u001b[39mif\u001b[39;49;00m \u001b[39misinstance\u001b[39;49m(catch, Iterable) \u001b[39melse\u001b[39;49;00m (catch,),\n\u001b[1;32m    450\u001b[0m         callbacks\u001b[39m=\u001b[39;49mcallbacks,\n\u001b[1;32m    451\u001b[0m         gc_after_trial\u001b[39m=\u001b[39;49mgc_after_trial,\n\u001b[1;32m    452\u001b[0m         show_progress_bar\u001b[39m=\u001b[39;49mshow_progress_bar,\n\u001b[1;32m    453\u001b[0m     )\n",
+      "File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py:66\u001b[0m, in \u001b[0;36m_optimize\u001b[0;34m(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)\u001b[0m\n\u001b[1;32m     64\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m     65\u001b[0m     \u001b[39mif\u001b[39;00m n_jobs \u001b[39m==\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[0;32m---> 66\u001b[0m         _optimize_sequential(\n\u001b[1;32m     67\u001b[0m             study,\n\u001b[1;32m     68\u001b[0m             func,\n\u001b[1;32m     69\u001b[0m             n_trials,\n\u001b[1;32m     70\u001b[0m             timeout,\n\u001b[1;32m     71\u001b[0m             catch,\n\u001b[1;32m     72\u001b[0m             callbacks,\n\u001b[1;32m     73\u001b[0m             gc_after_trial,\n\u001b[1;32m     74\u001b[0m             reseed_sampler_rng\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m     75\u001b[0m             time_start\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m,\n\u001b[1;32m     76\u001b[0m             progress_bar\u001b[39m=\u001b[39;49mprogress_bar,\n\u001b[1;32m     77\u001b[0m         )\n\u001b[1;32m     78\u001b[0m     \u001b[39melse\u001b[39;00m:\n\u001b[1;32m     79\u001b[0m         \u001b[39mif\u001b[39;00m n_jobs \u001b[39m==\u001b[39m \u001b[39m-\u001b[39m\u001b[39m1\u001b[39m:\n",
+      "File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py:163\u001b[0m, in \u001b[0;36m_optimize_sequential\u001b[0;34m(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar)\u001b[0m\n\u001b[1;32m    160\u001b[0m         \u001b[39mbreak\u001b[39;00m\n\u001b[1;32m    162\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 163\u001b[0m     frozen_trial \u001b[39m=\u001b[39m _run_trial(study, func, catch)\n\u001b[1;32m    164\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m    165\u001b[0m     \u001b[39m# The following line mitigates memory problems that can be occurred in some\u001b[39;00m\n\u001b[1;32m    166\u001b[0m     \u001b[39m# environments (e.g., services that use computing containers such as GitHub Actions).\u001b[39;00m\n\u001b[1;32m    167\u001b[0m     \u001b[39m# Please refer to the following PR for further details:\u001b[39;00m\n\u001b[1;32m    168\u001b[0m     \u001b[39m# https://github.com/optuna/optuna/pull/325.\u001b[39;00m\n\u001b[1;32m    169\u001b[0m     \u001b[39mif\u001b[39;00m gc_after_trial:\n",
+      "File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py:251\u001b[0m, in \u001b[0;36m_run_trial\u001b[0;34m(study, func, catch)\u001b[0m\n\u001b[1;32m    244\u001b[0m         \u001b[39massert\u001b[39;00m \u001b[39mFalse\u001b[39;00m, \u001b[39m\"\u001b[39m\u001b[39mShould not reach.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m    246\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[1;32m    247\u001b[0m     frozen_trial\u001b[39m.\u001b[39mstate \u001b[39m==\u001b[39m TrialState\u001b[39m.\u001b[39mFAIL\n\u001b[1;32m    248\u001b[0m     \u001b[39mand\u001b[39;00m func_err \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m    249\u001b[0m     \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(func_err, catch)\n\u001b[1;32m    250\u001b[0m ):\n\u001b[0;32m--> 251\u001b[0m     \u001b[39mraise\u001b[39;00m func_err\n\u001b[1;32m    252\u001b[0m \u001b[39mreturn\u001b[39;00m frozen_trial\n",
+      "File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py:200\u001b[0m, in \u001b[0;36m_run_trial\u001b[0;34m(study, func, catch)\u001b[0m\n\u001b[1;32m    198\u001b[0m \u001b[39mwith\u001b[39;00m get_heartbeat_thread(trial\u001b[39m.\u001b[39m_trial_id, study\u001b[39m.\u001b[39m_storage):\n\u001b[1;32m    199\u001b[0m     \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 200\u001b[0m         value_or_values \u001b[39m=\u001b[39m func(trial)\n\u001b[1;32m    201\u001b[0m     \u001b[39mexcept\u001b[39;00m exceptions\u001b[39m.\u001b[39mTrialPruned \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m    202\u001b[0m         \u001b[39m# TODO(mamu): Handle multi-objective cases.\u001b[39;00m\n\u001b[1;32m    203\u001b[0m         state \u001b[39m=\u001b[39m TrialState\u001b[39m.\u001b[39mPRUNED\n",
+      "Cell \u001b[0;32mIn[4], line 16\u001b[0m, in \u001b[0;36mobjective\u001b[0;34m(trial)\u001b[0m\n\u001b[1;32m     12\u001b[0m logger \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m     13\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m     14\u001b[0m     \u001b[39m# Select hyperparameters for testing\u001b[39;00m\n\u001b[1;32m     15\u001b[0m     in_act \u001b[39m=\u001b[39m (\n\u001b[0;32m---> 16\u001b[0m         activation_dispatcher[trial\u001b[39m.\u001b[39;49msuggest_categorical(\u001b[39m\"\u001b[39;49m\u001b[39min_act\u001b[39;49m\u001b[39m\"\u001b[39;49m, activation_vals)],\n\u001b[1;32m     17\u001b[0m         (),\n\u001b[1;32m     18\u001b[0m         {},\n\u001b[1;32m     19\u001b[0m     )\n\u001b[1;32m     20\u001b[0m     compressor_kernel_size \u001b[39m=\u001b[39m trial\u001b[39m.\u001b[39msuggest_int(\n\u001b[1;32m     21\u001b[0m         \u001b[39m\"\u001b[39m\u001b[39mcompressor_kernel_size\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m64\u001b[39m, \u001b[39m257\u001b[39m, \u001b[39m64\u001b[39m\n\u001b[1;32m     22\u001b[0m     )\n\u001b[1;32m     23\u001b[0m     compressor_chunk_size \u001b[39m=\u001b[39m \u001b[39m128\u001b[39m\n",
+      "File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/trial/_trial.py:405\u001b[0m, in \u001b[0;36mTrial.suggest_categorical\u001b[0;34m(self, name, choices)\u001b[0m\n\u001b[1;32m    354\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Suggest a value for the categorical parameter.\u001b[39;00m\n\u001b[1;32m    355\u001b[0m \n\u001b[1;32m    356\u001b[0m \u001b[39mThe value is sampled from ``choices``.\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    400\u001b[0m \u001b[39m    :ref:`configurations` tutorial describes more details and flexible usages.\u001b[39;00m\n\u001b[1;32m    401\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m    402\u001b[0m \u001b[39m# There is no need to call self._check_distribution because\u001b[39;00m\n\u001b[1;32m    403\u001b[0m \u001b[39m# CategoricalDistribution does not support dynamic value space.\u001b[39;00m\n\u001b[0;32m--> 405\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_suggest(name, CategoricalDistribution(choices\u001b[39m=\u001b[39;49mchoices))\n",
+      "File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/trial/_trial.py:630\u001b[0m, in \u001b[0;36mTrial._suggest\u001b[0;34m(self, name, distribution)\u001b[0m\n\u001b[1;32m    628\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    629\u001b[0m     study \u001b[39m=\u001b[39m pruners\u001b[39m.\u001b[39m_filter_study(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstudy, trial)\n\u001b[0;32m--> 630\u001b[0m     param_value \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mstudy\u001b[39m.\u001b[39;49msampler\u001b[39m.\u001b[39;49msample_independent(\n\u001b[1;32m    631\u001b[0m         study, trial, name, distribution\n\u001b[1;32m    632\u001b[0m     )\n\u001b[1;32m    634\u001b[0m \u001b[39m# `param_value` is validated here (invalid value like `np.nan` raises ValueError).\u001b[39;00m\n\u001b[1;32m    635\u001b[0m param_value_in_internal_repr \u001b[39m=\u001b[39m distribution\u001b[39m.\u001b[39mto_internal_repr(param_value)\n",
+      "File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/sampler.py:471\u001b[0m, in \u001b[0;36mTPESampler.sample_independent\u001b[0;34m(self, study, trial, param_name, param_distribution)\u001b[0m\n\u001b[1;32m    467\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    468\u001b[0m     mpe_below \u001b[39m=\u001b[39m _ParzenEstimator(\n\u001b[1;32m    469\u001b[0m         below, {param_name: param_distribution}, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_parzen_estimator_parameters\n\u001b[1;32m    470\u001b[0m     )\n\u001b[0;32m--> 471\u001b[0m mpe_above \u001b[39m=\u001b[39m _ParzenEstimator(\n\u001b[1;32m    472\u001b[0m     above, {param_name: param_distribution}, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_parzen_estimator_parameters\n\u001b[1;32m    473\u001b[0m )\n\u001b[1;32m    474\u001b[0m samples_below \u001b[39m=\u001b[39m mpe_below\u001b[39m.\u001b[39msample(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_rng, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_n_ei_candidates)\n\u001b[1;32m    475\u001b[0m log_likelihoods_below \u001b[39m=\u001b[39m mpe_below\u001b[39m.\u001b[39mlog_pdf(samples_below)\n",
+      "File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py:75\u001b[0m, in \u001b[0;36m_ParzenEstimator.__init__\u001b[0;34m(self, observations, search_space, parameters, predetermined_weights)\u001b[0m\n\u001b[1;32m     71\u001b[0m     weights \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mappend(weights, [parameters\u001b[39m.\u001b[39mprior_weight])\n\u001b[1;32m     72\u001b[0m weights \u001b[39m/\u001b[39m\u001b[39m=\u001b[39m weights\u001b[39m.\u001b[39msum()\n\u001b[1;32m     73\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_mixture_distribution \u001b[39m=\u001b[39m _MixtureOfProductDistribution(\n\u001b[1;32m     74\u001b[0m     weights\u001b[39m=\u001b[39mweights,\n\u001b[0;32m---> 75\u001b[0m     distributions\u001b[39m=\u001b[39m[\n\u001b[1;32m     76\u001b[0m         \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_calculate_distributions(\n\u001b[1;32m     77\u001b[0m             transformed_observations[:, i], search_space[param], parameters\n\u001b[1;32m     78\u001b[0m         )\n\u001b[1;32m     79\u001b[0m         \u001b[39mfor\u001b[39;49;00m i, param \u001b[39min\u001b[39;49;00m \u001b[39menumerate\u001b[39;49m(search_space)\n\u001b[1;32m     80\u001b[0m     ],\n\u001b[1;32m     81\u001b[0m )\n",
+      "File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py:76\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m     71\u001b[0m     weights \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mappend(weights, [parameters\u001b[39m.\u001b[39mprior_weight])\n\u001b[1;32m     72\u001b[0m weights \u001b[39m/\u001b[39m\u001b[39m=\u001b[39m weights\u001b[39m.\u001b[39msum()\n\u001b[1;32m     73\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_mixture_distribution \u001b[39m=\u001b[39m _MixtureOfProductDistribution(\n\u001b[1;32m     74\u001b[0m     weights\u001b[39m=\u001b[39mweights,\n\u001b[1;32m     75\u001b[0m     distributions\u001b[39m=\u001b[39m[\n\u001b[0;32m---> 76\u001b[0m         \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_calculate_distributions(\n\u001b[1;32m     77\u001b[0m             transformed_observations[:, i], search_space[param], parameters\n\u001b[1;32m     78\u001b[0m         )\n\u001b[1;32m     79\u001b[0m         \u001b[39mfor\u001b[39;00m i, param \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(search_space)\n\u001b[1;32m     80\u001b[0m     ],\n\u001b[1;32m     81\u001b[0m )\n",
+      "File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py:154\u001b[0m, in \u001b[0;36m_ParzenEstimator._calculate_distributions\u001b[0;34m(self, transformed_observations, search_space, parameters)\u001b[0m\n\u001b[1;32m    147\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_calculate_distributions\u001b[39m(\n\u001b[1;32m    148\u001b[0m     \u001b[39mself\u001b[39m,\n\u001b[1;32m    149\u001b[0m     transformed_observations: np\u001b[39m.\u001b[39mndarray,\n\u001b[1;32m    150\u001b[0m     search_space: BaseDistribution,\n\u001b[1;32m    151\u001b[0m     parameters: _ParzenEstimatorParameters,\n\u001b[1;32m    152\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m _BatchedDistributions:\n\u001b[1;32m    153\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(search_space, CategoricalDistribution):\n\u001b[0;32m--> 154\u001b[0m         \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_calculate_categorical_distributions(\n\u001b[1;32m    155\u001b[0m             transformed_observations, search_space\u001b[39m.\u001b[39;49mchoices, parameters\n\u001b[1;32m    156\u001b[0m         )\n\u001b[1;32m    157\u001b[0m     \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    158\u001b[0m         \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(search_space, (FloatDistribution, IntDistribution))\n",
+      "File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py:192\u001b[0m, in \u001b[0;36m_ParzenEstimator._calculate_categorical_distributions\u001b[0;34m(self, observations, choices, parameters)\u001b[0m\n\u001b[1;32m    186\u001b[0m weights \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mfull(\n\u001b[1;32m    187\u001b[0m     shape\u001b[39m=\u001b[39m(\u001b[39mlen\u001b[39m(observations) \u001b[39m+\u001b[39m consider_prior, \u001b[39mlen\u001b[39m(choices)),\n\u001b[1;32m    188\u001b[0m     fill_value\u001b[39m=\u001b[39mparameters\u001b[39m.\u001b[39mprior_weight \u001b[39m/\u001b[39m (\u001b[39mlen\u001b[39m(observations) \u001b[39m+\u001b[39m consider_prior),\n\u001b[1;32m    189\u001b[0m )\n\u001b[1;32m    191\u001b[0m weights[np\u001b[39m.\u001b[39marange(\u001b[39mlen\u001b[39m(observations)), observations\u001b[39m.\u001b[39mastype(\u001b[39mint\u001b[39m)] \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[0;32m--> 192\u001b[0m weights \u001b[39m/\u001b[39m\u001b[39m=\u001b[39m weights\u001b[39m.\u001b[39;49msum(axis\u001b[39m=\u001b[39;49m\u001b[39m1\u001b[39;49m, keepdims\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n\u001b[1;32m    193\u001b[0m \u001b[39mreturn\u001b[39;00m _BatchedCategoricalDistributions(weights)\n",
+      "File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/numpy/core/_methods.py:47\u001b[0m, in \u001b[0;36m_sum\u001b[0;34m(a, axis, dtype, out, keepdims, initial, where)\u001b[0m\n\u001b[1;32m     43\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_amin\u001b[39m(a, axis\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, out\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, keepdims\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m,\n\u001b[1;32m     44\u001b[0m           initial\u001b[39m=\u001b[39m_NoValue, where\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n\u001b[1;32m     45\u001b[0m     \u001b[39mreturn\u001b[39;00m umr_minimum(a, axis, \u001b[39mNone\u001b[39;00m, out, keepdims, initial, where)\n\u001b[0;32m---> 47\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_sum\u001b[39m(a, axis\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, dtype\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, out\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, keepdims\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m,\n\u001b[1;32m     48\u001b[0m          initial\u001b[39m=\u001b[39m_NoValue, where\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n\u001b[1;32m     49\u001b[0m     \u001b[39mreturn\u001b[39;00m umr_sum(a, axis, dtype, out, keepdims, initial, where)\n\u001b[1;32m     51\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_prod\u001b[39m(a, axis\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, dtype\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, out\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, keepdims\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m,\n\u001b[1;32m     52\u001b[0m           initial\u001b[39m=\u001b[39m_NoValue, where\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "storage_name = \"sqlite:///optuna.sql\"\n",
+    "storage_name = \"mysql+pymysql://root:Ch31121992@192.168.1.10:3306/optuna_db\"\n",
+    "study_name = \"Experiment 2\"\n",
+    "study = optuna.create_study(\n",
+    "    study_name=study_name,\n",
+    "    storage=storage_name,\n",
+    "    direction=\"minimize\",\n",
+    "    pruner=HyperbandPruner(),\n",
+    "    load_if_exists=True,\n",
+    ")\n",
+    "study.optimize(\n",
+    "    objective,\n",
+    "    n_trials=None,\n",
+    "    timeout=None,\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/expt2.py
+++ b/expt2.py
@@ -0,0 +1,521 @@
+# %% [markdown]
+# <h1>Experiment 1</h1>
+# <h3>Initial hyperparameter tuning</h3>
+# <p>Summary</p>
+# <ul>
+# <li>A model was created with a dynamic constructor, allowing for a hyperparameter-driven model</li>
+# <li>Hyperparameters were tuned using <code>`Optuna`</code></li>
+# <li>Training loop was constructed using <code>`PyTorchLightning`</code></li>
+# <li>Model was trained on a cluster of machines using a shared SQL trial database</li>
+# <li>An extremely aggressive pruning algorithm was used to quickly narrow in on an optimal hyperparameter space</li>
+# <li>Experiment 1 was left to train on the cluster for 2 days</li>
+# </ul>
+
+# %%
+# Data handling imports
+from dask.distributed import Client, LocalCluster
+import dask
+import dask.dataframe as dd
+import dask.array as da
+import numpy as np
+import pickle
+import random
+from itertools import chain
+from tqdm.auto import tqdm
+
+# Deep learning imports
+import torch
+from torch.utils.data import DataLoader
+from torch import nn
+from torch.nn import functional as F
+from torch import optim
+import pytorch_lightning as pl
+from pytorch_lightning import Trainer
+import optuna
+from optuna.pruners import HyperbandPruner
+from optuna.integration import PyTorchLightningPruningCallback
+
+
+# Suppress some warning messages from pytorch_lightning,
+# It really doesn't like that i've forced it to handle a dask array!
+import warnings
+
+warnings.filterwarnings("ignore", category=UserWarning, module=pl.__name__)
+
+# Also, set up a log to record debug messages for failed trials
+import logging
+
+logging.basicConfig(filename="debug.log", encoding="utf-8", level=logging.ERROR)
+
+# %% [markdown]
+# <h3>Patching PyTorchLightning</h3>
+# <p>
+# A key part of this project was to develop a patch for PyTorchLightning to allow for the use of <code>`dask`</code> arrays as inputs. It was important that PyTorchLightning can accept <code>`dask`</code> arrays and only load the data into memory when needed. Otherwise, our extremely large datasets would simply crash our system as they are significantly larger than the available RAM and VRAM.
+# </p><p>
+# After several versions of the patch, this final version was developed. It is a simple monkey patch that wraps the <code>pytorch_lightning.utlities.data._extract_batch_size</code> generator with a check that mimics the expected behaviour for torch tensors when given a dask array and extends its type signature to ensure static analysis is still possible.
+# </p><p>
+# With this patch applied, the forward method in our model can accept a dask array and only compute each chunk of the array when needed. This allows us to train our model on datasets that are significantly larger than the available memory.
+# </p>
+
+# %%
+# Monkey patch to allow pytorch lightning to accept a dask array as a model input
+from typing import Any, Generator, Iterable, Mapping, Optional, Union
+
+BType = Union[da.Array, torch.Tensor, str, Mapping[Any, "BType"], Iterable["BType"]]
+
+unpatched = pl.utilities.data._extract_batch_size
+
+
+def patch(batch: BType) -> Generator[Optional[int], None, None]:
+    if isinstance(batch, da.core.Array):
+        if len(batch.shape) == 0:
+            yield 1
+        else:
+            yield batch.shape[0]
+    else:
+        yield from unpatched(batch)
+
+
+pl.utilities.data._extract_batch_size = patch
+
+# %%
+# Set the device to use with torch
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+# Prepare a dask cluster and client
+def create_client():
+    cluster = LocalCluster(n_workers=2, threads_per_worker=1)
+    client = Client(cluster)
+    return client
+
+if __name__ == "__main__":
+    client = create_client()
+
+# %%
+# Load X and y for training
+samples = list(range(1, 82))
+
+with open("sample_X.pkl", "rb") as f:
+    X = pickle.load(f)
+
+with open("sample_y.pkl", "rb") as f:
+    y = pickle.load(f)
+
+# %% [markdown]
+# <h3>Dataset Splitting</h3>
+# <p>The dataset is split into a training and validation dataset (80:20 split). Because the number of available samples is extremely small, we haven't produced a test dataset. In the future, as more data is obtained, a test set should be included whenever possible.</p>
+
+# %%
+# Separate samples into training and validation sets
+val_samples = random.sample(samples, k=len(samples) // 5)
+train_samples = [s for s in samples if s not in val_samples]
+
+X_train = {i: X[i] for i in train_samples}
+X_val = {i: X[i] for i in val_samples}
+y_train = {i: y[i] for i in train_samples}
+y_val = {i: y[i] for i in val_samples}
+
+# %% [markdown]
+# <h3>Dataset Collation</h3>
+# <p>This function returns a closure for collating our data in a torch DataLoader. The use of a DataLoader will allow us to shuffle and prefetch data, reducing overfitting and maximising performance as IO will be a bottleneck. The closure is dynamically constructed, allowing us to select the outputs we train against. However, for this experiment we will match against all outputs for simplicity.</p>
+
+# %%
+# Create a function to dynamically modify data collation
+def collate_fn(batch):
+    X0 = batch[0][0][0].to_numpy(dtype=np.float32)[0]
+    X1 = batch[0][0][1].to_dask_array(lengths=True)
+    y = batch[0][1].to_numpy(dtype=np.float32)
+    return (
+        torch.from_numpy(X0).to(device),
+        X1,
+        torch.from_numpy(y).to(device),
+    )
+
+# %% [markdown]
+# <h3>Convolutional Data Compression</h3>
+# <p>
+# The <code>`DaskCompression`</code> module accepts a dask array, and applies a convolutional kernel to it to significantly compress the input data. This allows us to transform a larger than VRAM dataset into one that can fit on our GPU, and (hopefully) retain the relevant information to train the rest of our model on.
+# </p><p>
+# Note how the kernel is only computed in line 12 and is immediately compressed via convolution. This ensures that only one kernel needs to be stored in memory at a time, avoiding the need to hold the entire dataset in memory at once.
+# </p>
+
+# %%
+class DaskCompression(nn.Module):
+    def __init__(
+        self, in_channels, out_channels, kernel_size, chunk_size=1, device=device
+    ):
+        super(DaskCompression, self).__init__()
+        self.kernel_size = kernel_size
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.chunk_size = chunk_size
+        self.device = device
+        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size).to(device)
+
+    def compress_kernel(self, kernel):
+        return (
+            self.conv(torch.from_numpy(kernel.compute()).to(self.device))
+            .squeeze()
+            .to("cpu")  # return to cpu to save VRAM
+        )
+
+    def forward(self, x):
+        # Precompute the dimensions of the output array
+        dim0, dim2 = x.shape
+        assert dim2 == self.in_channels
+        dim0 = (dim0 // self.kernel_size) // self.chunk_size
+        x = x.reshape(dim0, self.chunk_size, self.kernel_size, dim2)
+        x = da.transpose(x, axes=(0, 1, 3, 2))
+
+        x = [self.compress_kernel(kernel) for kernel in x]
+        return torch.stack(x).to(self.device)
+
+
+
+# %% [markdown]
+# <h3>Model Design</h3>
+# <p>
+# The model was designed to be a dynamically constructed, hyperparameter driven model for ease of hyperparameter optimisation. The contructed model will process data in the following way:
+# </p>
+# <ol>
+# <li>The input is left/right padded to a multiple of the compressor kernel size</li>
+# <li>The dask array is compressed by a <code>`DaskCompressor`</code> layer, treating each input as a channel</li>
+# <li>The compressed array is then recursively convoluted down to a size less than or equal to the width of our feedforward network</li>
+# <li>The channels of the now convolved data are combined</li>
+# <li>The combined, flattened data is then left/right padded to the width of the feedforward network</li>
+# <li>Finally, the data is fed into a feedforward network</li>
+# </ol>
+# <p>
+# This relatively simple design allows the network to accept both larger-than-ram datasets as inputs, and datasets of variable sizes. This makes it suitable for training on whole Aconity datasets, without the need for culling or binning.
+# </p>
+
+# %%
+class Model(pl.LightningModule):
+    def __init__(
+        self,
+        # pl attributes
+        optimizer=torch.optim.Adam,
+        optimizer_args=(),
+        optimizer_kwargs={},
+        scheduler=None,
+        scheduler_kwargs={},
+        loss=torch.nn.MSELoss(),
+        train_ds=None,
+        val_ds=None,
+        # model args & kwargs
+        compressor_kernel_size=128,
+        compressor_chunk_size=128,
+        compressor_act=(nn.ReLU, (), {}),
+        conv_kernel_size=128,
+        conv_norm=False,
+        conv_act=(nn.ReLU, (), {}),
+        channel_combine_act=(nn.ReLU, (), {}),
+        param_ff_depth=4,
+        param_ff_width=16,
+        param_ff_act=(nn.ReLU, (), {}),
+        ff_width=512,
+        ff_depth=4,
+        ff_act=(nn.ReLU, (), {}),
+        out_size=6,
+        out_act=(nn.ReLU, (), {}),
+    ):
+        super().__init__()
+        # Assign necessary attributes for pl model
+        self.optimizer = optimizer
+        self.optimizer_args = optimizer_args
+        self.optimizer_kwargs = optimizer_kwargs
+        self.scheduler = scheduler
+        self.scheduler_kwargs = scheduler_kwargs
+        self.loss = loss
+        self.train_ds = train_ds
+        self.val_ds = val_ds
+        # Attrs for dynamically created model to be tested
+        self.compressor_kernel_size = compressor_kernel_size
+        self.compressor_chunk_size = compressor_chunk_size
+        self.conv_kernel_size = conv_kernel_size
+        self.ff_width = ff_width
+        self.ff_depth = ff_depth
+        self.out_size = out_size
+        # layers
+        # compressor compresses and converts dask array to torch tensor
+        self.convolutional_compressor = DaskCompression(
+            5,
+            5,
+            kernel_size=compressor_kernel_size,
+            chunk_size=compressor_chunk_size,
+        )
+        self.compressor_act = compressor_act[0](*compressor_act[1], **compressor_act[2])
+        # convolutional layer recursively applies convolutions to the compressed input
+        self.conv = nn.Conv1d(5, 5, kernel_size=conv_kernel_size)
+        self.conv_norm = nn.LocalResponseNorm(5) if conv_norm else nn.Identity()
+        self.conv_act = conv_act[0](*conv_act[1], **conv_act[2])
+        self.combine_channels = nn.Conv1d(5, 1, kernel_size=1)
+        self.channel_combine_act = channel_combine_act[0](
+            *channel_combine_act[1], **channel_combine_act[2]
+        )
+        self.param_ff = nn.Sequential(
+            nn.Linear(4, param_ff_width),
+            param_ff_act[0](*param_ff_act[1], **param_ff_act[2]),
+            *chain(
+                *(
+                    (
+                        nn.Linear(param_ff_width, param_ff_width),
+                        param_ff_act[0](*param_ff_act[1], **param_ff_act[2]),
+                    )
+                    for _ in range(param_ff_depth)
+                )
+            ),
+        )
+        self.ff = nn.Sequential(
+            nn.Linear(ff_width + param_ff_width, ff_width),
+            ff_act[0](*ff_act[1], **ff_act[2]),
+            *chain(
+                *(
+                    (
+                        nn.Linear(ff_width, ff_width),
+                        ff_act[0](*ff_act[1], **ff_act[2]),
+                    )
+                    for _ in range(ff_depth)
+                )
+            ),
+        )
+        self.out_dense = nn.Linear(ff_width, out_size)
+        self.out_act = out_act[0](*out_act[1], **out_act[2])
+
+    @staticmethod
+    def pad_ax0_to_multiple_of(x, multiple_of):
+        padding = (((x.shape[0] // multiple_of) + 1) * multiple_of) - x.shape[0]
+        left_pad = padding // 2
+        right_pad = padding - left_pad
+        return da.pad(
+            x, ((left_pad, right_pad), (0, 0)), mode="constant", constant_values=0
+        )
+
+    def pad_to_ff_width(self, x):
+        padding = self.ff_width - x.shape[1]
+        left_pad = padding // 2
+        right_pad = padding - left_pad
+        return F.pad(
+            x,
+            (right_pad, left_pad, 0, 0),
+            mode="constant",
+            value=0.0,
+        )
+
+    def forward(self, x0, x1):
+        # pad to a multiple of kernel_size * chunk_size
+        x1 = self.pad_ax0_to_multiple_of(
+            x1, self.compressor_kernel_size * self.compressor_chunk_size
+        )
+        x1 = self.convolutional_compressor(x1)
+        x1 = x1.reshape(x1.shape[0] * x1.shape[1], x1.shape[2]).T.unsqueeze(0)
+        while x1.shape[2] > self.ff_width:
+            x1 = self.conv(x1)
+            x1 = self.conv_norm(x1)
+            x1 = self.conv_act(x1)
+        x1 = self.combine_channels(x1)
+        x1 = self.channel_combine_act(x1)
+        x1 = x1.squeeze(1)
+        x1 = self.pad_to_ff_width(x1)
+        x0 = x0.unsqueeze(0)
+        x0 = self.param_ff(x0)
+        x = torch.cat((x1, x0), dim=1)
+        x = self.ff(x)
+        x = self.out_dense(x)
+        x = self.out_act(x)
+        return x
+
+    def configure_optimizers(self):
+        optimizer = self.optimizer(
+            self.parameters(), *self.optimizer_args, **self.optimizer_kwargs
+        )
+        if self.scheduler is not None:
+            scheduler = self.scheduler(optimizer, **self.scheduler_kwargs)
+            return optimizer, scheduler
+        else:
+            return optimizer
+
+    def train_dataloader(self):
+        return self.train_ds
+
+    def val_dataloader(self):
+        return self.val_ds
+
+    def training_step(self, batch, batch_idx):
+        x0, x1, y = batch
+        y_hat = self(x0, x1)
+        loss = self.loss(y_hat, y)
+        self.log("train_loss", loss)
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        x0, x1, y = batch
+        y_hat = self(x0, x1)
+        loss = self.loss(y_hat, y)
+        self.log("val_loss", loss)
+        return loss
+
+# %% [markdown]
+# <h3>Activation Functions</h3>
+# <p>
+# For our hyperparameter optimisation, we intend to test all the activation functions in PyTorch. In addition to the builtin activations, we will also train using the following custom implemented activation functions from literature or our own design:
+# </p>
+# <ol>
+# <li><b><code>BMU</code>:</b> Bio-Mimicking Unit; an activation function designed to mimic the activation potential of a biological neuron.</li>
+# <li><b><code>SoftExp</code>:</b> Soft Exponential function; a parametric activation function that fits to a wide variety of exponential curves (DOI: <a href=https://arxiv.org/abs/1602.01321v1>10.48550/arXiv.1602.01321</a>)</li>
+# <li><b><code>LeakyPReQU</code>:</b> Leaky Parametric Rectified Quadratic Unit; A smoothly and continuously differentiable function that is a parametrically sloped line for <code>x&#8924;0</code> and a quadratic curve for <code>x&gt;0</code></li>
+# <li><b><code>ISRU</code>:</b> Inverse Square Root Unit; a somewhat uncommon function that can be useful in models such as this as it yields a continuously differentiable curve while being extremely fast to compute using bit manipulation</li>
+# <li><b><code>ISRLU</code>:</b> Inverse Square Root Linear Unit; a modified ISRU that is an ISRU for <code>x&lt;0</code> and <code>`f(x)=x`</code> for <code>x&#8925;0</code> (DOI: <a href=https://arxiv.org/abs/1710.09967>10.48550/arXiv.1710.09967</a>)</li>
+# <li><b><code>PBessel</code>:</b> Parametric Besse; A parametric Bessel curve yielding various different wave formations depending on a trainable parameter</li>
+# <li><b><code>Sinusoid</code>:</b> A parametric sine wave, with amplitude and wavelength as trainable parameters</li>
+# <li><b><code>Modulo</code>:</b> A parametric sawtooth wave, <code>`f(x)=x%&#593;</code> where &#593; is a trainable parameter</li>
+# <li><b><code>TriWave</code>:</b> A parametric triangle wave, with amplitude and wavelength as trainable parameters</li>
+# <li><b><code>Gaussian</code>:</b> A parametric gaussian curve, with trainable amplitude</li>
+# </ol>
+
+# %%
+# Create a dispatcher including all builtin activations and
+# Several custom activations from experimentation or literature
+from custom_activations import SoftExp, PBessel
+
+
+activation_dispatcher = {
+    "Tanh": nn.Tanh,
+    "SiLU": nn.SiLU,
+    "Softplus": nn.Softplus,
+    "SoftExp": SoftExp,
+    "PBessel": PBessel,
+}
+
+from boltons.dictutils import FrozenDict
+
+out_act_dispatcher = {
+    "Sigmoid": (nn.Sigmoid, (), {}),
+    "Softmax": (nn.Softmax, (), {"dim": 1}),
+}
+
+# %% [markdown]
+# <h3>Hyperparameter training</h3>
+# <p>Here, we define an objective function, describing what we want Optuna to do during each trial and how to react to various errors and/or situations that may arise. To summarise the objective:</p>
+# <ul>
+# <li>Optuna selects hyperparameters for all input parameters within the given constraints</li>
+# <li>A model is generated using the selected hyperparameters</li>
+# <li>PyTorchLightning trains the model through 2 epochs</li>
+# <li>The model is evaluated on the validation set</li>
+# <li>The validation loss is returned to Optuna</li>
+# </ul>
+# <p>
+# Optuna monitors the reported validation loss and attempts to minimise it. An extremely aggressive pruning strategy known as "hyperband pruning" is used to efficiently reduce down the parameter space to something more reasonable. Any parameter set which optuna deems suboptimal will be immediately pruned or even stopped early to save time.
+# </p>
+
+# %%
+# Test parameters
+n_epochs = 2
+output_keys = list(next(iter(y_train.values())).keys())
+activation_vals = list(activation_dispatcher.keys())
+out_act_vals = list(out_act_dispatcher.keys())
+
+
+# Next we define the objective function for the hyperparameter optimization
+def objective(trial):
+    torch.cuda.empty_cache()
+    objective_value = torch.inf
+    model = None
+    logger = None
+    try:
+        # Select hyperparameters for testing
+        compressor_kernel_size = 128
+        compressor_chunk_size = 128
+        compressor_act = (SoftExp, (), {})
+        conv_kernel_size = 128
+        conv_norm = False
+        conv_act = (nn.Tanh, (), {})
+        channel_combine_act = (nn.Softplus, (), {})
+        param_ff_depth = 2
+        param_ff_width = 16
+        param_ff_act = (PBessel, (), {})
+        ff_width = 1024
+        ff_depth = 4
+        ff_act = (nn.Softplus, (), {})
+        out_size = 2
+        out_act = out_act_dispatcher[trial.suggest_categorical("out_act", out_act_vals)]
+
+        # Set up the model architecture and other necessary components
+        model = Model(
+            compressor_kernel_size=compressor_kernel_size,
+            compressor_chunk_size=compressor_chunk_size,
+            compressor_act=compressor_act,
+            conv_kernel_size=conv_kernel_size,
+            conv_act=conv_act,
+            conv_norm=conv_norm,
+            channel_combine_act=channel_combine_act,
+            param_ff_depth=param_ff_depth,
+            param_ff_width=param_ff_width,
+            param_ff_act=param_ff_act,
+            ff_width=ff_width,
+            ff_depth=ff_depth,
+            ff_act=ff_act,
+            out_size=out_size,
+            out_act=out_act,
+        ).to(device)
+
+        trainer = Trainer(
+            accelerator="gpu",
+            max_epochs=n_epochs,
+            devices=1,
+            logger=logger,
+            num_sanity_val_steps=0,  # Needs to be disabled or else we get an error because X is dask array
+            # precision="16-mixed",
+            callbacks=[
+                PyTorchLightningPruningCallback(trial, monitor="val_loss"),
+            ],
+        )
+        # Prepare datasets
+        train = DataLoader(
+            list(zip(X_train.values(), y_train.values())),
+            collate_fn=collate_fn,
+            shuffle=True,
+        )
+        valid = DataLoader(
+            list(zip(X_val.values(), y_val.values())),
+            shuffle=True,
+            collate_fn=collate_fn,
+        )
+        # Finally, train the model
+        trainer.fit(model, train, valid)
+    except Exception as e:
+        logging.exception(f"An exception occurred in trial {trial.number}: {e}")
+        raise optuna.exceptions.TrialPruned()
+    finally:
+        if logger is not None:
+            logger.experiment.unwatch(model)
+            logger.experiment.finish()
+    del model
+    torch.cuda.empty_cache()
+    if objective_value == torch.inf:
+        raise optuna.exceptions.TrialPruned()
+    return objective_value
+
+# %% [markdown]
+# <h3>Hyperparameter Optimisation on a Computing Cluster</h3>
+# <p>
+# The final important step is to run the optimisation using a cluster of computers to maximise the number of trials that can be run in parallel. Although this could be achieved using a more complex, scheduler controlled system and dask, we will use the far simpler approach of using a shared SQL ledger to keep track of the trials and their results. This is a very simple approach, but it is sufficient for our purposes, and is easy to implement. Using this approach, the model was trained on a cluster of 5 computers at once.
+# </p>
+
+# %%
+if __name__ == "__main__":
+    # storage_name = "sqlite:///optuna.sql"
+    storage_name = "mysql+pymysql://root:Ch31121992@192.168.1.10:3306/optuna_db"
+    study_name = "Composition Experiment 2"
+    study = optuna.create_study(
+        study_name=study_name,
+        storage=storage_name,
+        direction="minimize",
+        load_if_exists=True,
+    )
+    study.optimize(
+        objective,
+        n_trials=10,
+        timeout=None,
+    )
+
+
--- a/expt2_analysis.ipynb
+++ b/expt2_analysis.ipynb
--- a/expt3.ipynb
+++ b/expt3.ipynb
@@ -0,0 +1,398 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<h1>Experiment 3</h1>\n",
+    "<h3>Optimised model training</h3>\n",
+    "<p>In experiment 3 the model was trained using the optimised hyperparameters. By examining the results of expt2, it was noticed that trials #1, #10, and #16 all resulted in quite low losses while also showing clear downward trends resembling a clearly discernible training curve. Of these, trial #16 was ultimately selected as the model to be tested, as the data suggests that <code>`in_act=Mish`</code> tends to give the lowest losses in most models tested. The parameters for trial #16 were as follows:</p>\n",
+    "<ul>\n",
+    "<li><b>in_act</b> = Mish</li>\n",
+    "<li><b>compressor_kernel_size</b> = 128</li>\n",
+    "<li><b>compressor_chunk_size</b> = 128</li>\n",
+    "<li><b>compressor_act</b> = SoftExp</li>\n",
+    "<li><b>conv_kernel_size</b> = 128</li>\n",
+    "<li><b>conv_act</b> = Sigmoid</li>\n",
+    "<li><b>channel_combine_act</b> = GELU</li>\n",
+    "<li><b>ff_width</b> = 512</li>\n",
+    "<li><b>ff_depth</b> = 2</li>\n",
+    "<li><b>ff_act</b> = CELU</li>\n",
+    "<li><b>out_act</b> = Tanhshrink</li>\n",
+    "</ul>\n",
+    "<p>\n",
+    "Because most of the training curves in expt2 appeared to be unstable, a learning rate scheduler was used to reduce the learning rate by 20% if the validation loss did not improve for 5 epochs. The model was checkpointed, with the best 10 iterations of the model being retained for testing after training.\n",
+    "</p>\n",
+    "<h3>Modified optimal model training</h3>\n",
+    "<p>\n",
+    "Following the first attempt at training the optimised model (Model 1, Test 1), it was noted that training curves were clearly discernible, but still quite unstable and noisy. To try and further improve the stability of the training, a modified version of the model was prepared and trained (Model 2, Test 2). The modified model was the same as Model 1, but with the addition of a LayerNormalization layer to the convolutional layer of the <code>`DaskCompressor`</code> submodule. This change was made because highly recurrent submodules such as the compressor are known to be especially prone to instability caused by vanishing or exploding gradients. It was reasoned that by normalizing at each iteration the gradients would be less likely to vanish or explode, making the training more stable.\n",
+    "</p>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Data handling imports\n",
+    "from dask.distributed import Client, LocalCluster\n",
+    "import dask\n",
+    "import dask.dataframe as dd\n",
+    "import dask.array as da\n",
+    "import numpy as np\n",
+    "import pickle\n",
+    "import random\n",
+    "from itertools import chain\n",
+    "from tqdm.auto import tqdm\n",
+    "\n",
+    "# Deep learning imports\n",
+    "import torch\n",
+    "from torch.utils.data import DataLoader\n",
+    "from torch import nn\n",
+    "from torch.nn import functional as F\n",
+    "from torch import optim\n",
+    "import pytorch_lightning as pl\n",
+    "from pytorch_lightning import Trainer\n",
+    "from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint\n",
+    "from pytorch_lightning.loggers import WandbLogger\n",
+    "\n",
+    "# Suppress some warning messages from pytorch_lightning,\n",
+    "# It really doesn't like that i've forced it to handle a dask array!\n",
+    "import warnings\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\", category=UserWarning, module=pl.__name__)\n",
+    "\n",
+    "# Also, set up a log to record debug messages for failed trials\n",
+    "import logging\n",
+    "\n",
+    "logging.basicConfig(filename=\"debug.log\", encoding=\"utf-8\", level=logging.DEBUG)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from expt1 import (\n",
+    "    Model,\n",
+    "    device,\n",
+    "    X_train,\n",
+    "    y_train,\n",
+    "    X_val,\n",
+    "    y_val,\n",
+    "    create_collate_fn,\n",
+    ")\n",
+    "from custom_activations import SoftExp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/distributed/node.py:182: UserWarning: Port 8787 is already in use.\n",
+      "Perhaps you already have a cluster running?\n",
+      "Hosting the HTTP server on port 34477 instead\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "cluster = LocalCluster(n_workers=8, threads_per_worker=1)\n",
+    "client = Client(cluster)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Monkey patch to allow pytorch lightning to accept a dask array as a model input\n",
+    "from typing import Any, Generator, Iterable, Mapping, Optional, Union\n",
+    "\n",
+    "BType = Union[da.Array, torch.Tensor, str, Mapping[Any, \"BType\"], Iterable[\"BType\"]]\n",
+    "\n",
+    "unpatched = pl.utilities.data._extract_batch_size\n",
+    "\n",
+    "\n",
+    "def patch(batch: BType) -> Generator[Optional[int], None, None]:\n",
+    "    if isinstance(batch, da.core.Array):\n",
+    "        if len(batch.shape) == 0:\n",
+    "            yield 1\n",
+    "        else:\n",
+    "            yield batch.shape[0]\n",
+    "    else:\n",
+    "        yield from unpatched(batch)\n",
+    "\n",
+    "\n",
+    "pl.utilities.data._extract_batch_size = patch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Prepare datasets\n",
+    "train = DataLoader(\n",
+    "    list(zip(X_train.values(), y_train.values())),\n",
+    "    collate_fn=create_collate_fn(),\n",
+    "    shuffle=True,\n",
+    ")\n",
+    "valid = DataLoader(\n",
+    "    list(zip(X_val.values(), y_val.values())),\n",
+    "    shuffle=True,\n",
+    "    collate_fn=create_collate_fn(),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set up the model architecture and other necessary components\n",
+    "model = Model(\n",
+    "    # Training parameters\n",
+    "    optimizer=optim.Adam,\n",
+    "    scheduler=optim.lr_scheduler.ReduceLROnPlateau,\n",
+    "    scheduler_kwargs={\"factor\": 0.8, \"patience\": 5},\n",
+    "    # Model parameters\n",
+    "    in_act=(nn.Mish, list(), dict()),\n",
+    "    compressor_kernel_size=128,\n",
+    "    compressor_chunk_size=128,\n",
+    "    compressor_act=(SoftExp, list(), dict()),\n",
+    "    conv_kernel_size=128,\n",
+    "    conv_act=(nn.Sigmoid, list(), dict()),\n",
+    "    channel_combine_act=(nn.GELU, list(), dict()),\n",
+    "    ff_width=512,\n",
+    "    ff_depth=2,\n",
+    "    ff_act=(nn.CELU, list(), dict()),\n",
+    "    out_size=len(list(next(iter(y_train.values())).keys())),\n",
+    "    out_act=(nn.Tanhshrink, list(), dict()),\n",
+    ").to(device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mchughes000\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6d1624339b4c4aaeb195b5ebc3b3e69e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016669258750092317, max=1.0…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "wandb version 0.15.8 is available!  To upgrade, please run:\n",
+       " $ pip install wandb --upgrade"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.15.7"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>./wandb/run-20230801_233841-q70oibx2</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/chughes000/Aconity_ML_Test_DryRun/runs/q70oibx2' target=\"_blank\">Test 2</a></strong> to <a href='https://wandb.ai/chughes000/Aconity_ML_Test_DryRun' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/chughes000/Aconity_ML_Test_DryRun' target=\"_blank\">https://wandb.ai/chughes000/Aconity_ML_Test_DryRun</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/chughes000/Aconity_ML_Test_DryRun/runs/q70oibx2' target=\"_blank\">https://wandb.ai/chughes000/Aconity_ML_Test_DryRun/runs/q70oibx2</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "GPU available: True (cuda), used: True\n",
+      "TPU available: False, using: 0 TPU cores\n",
+      "IPU available: False, using: 0 IPUs\n",
+      "HPU available: False, using: 0 HPUs\n"
+     ]
+    }
+   ],
+   "source": [
+    "early_stop_callback = EarlyStopping(\n",
+    "    monitor=\"val_loss\", patience=15, verbose=False, mode=\"min\"\n",
+    ")\n",
+    "\n",
+    "checkpoint_callback = ModelCheckpoint(\n",
+    "    monitor=\"val_loss\",\n",
+    "    dirpath=\"./checkpoints\",\n",
+    "    filename=\"checkpoint-{epoch:02d}-{val_loss:.2f}\",\n",
+    "    save_top_k=10,\n",
+    "    mode=\"min\",\n",
+    ")\n",
+    "\n",
+    "logger = WandbLogger(project=\"Aconity_ML_Test_DryRun\", name=f\"Test 1\")\n",
+    "logger.experiment.watch(model, log=\"all\", log_freq=1)\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    accelerator=\"gpu\",\n",
+    "    max_epochs=-1,\n",
+    "    devices=\"auto\",\n",
+    "    strategy=\"auto\",\n",
+    "    logger=logger,\n",
+    "    callbacks=[checkpoint_callback, early_stop_callback],\n",
+    "    num_sanity_val_steps=0,  # Needs to be disabled or else we get an error because X is dask array\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
+      "/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/pytorch_lightning/core/optimizer.py:361: RuntimeWarning: Found unsupported keys in the optimizer configuration: {'scheduler'}\n",
+      "  rank_zero_warn(\n",
+      "\n",
+      "   | Name                     | Type            | Params\n",
+      "--------------------------------------------------------------\n",
+      "0  | loss                     | MSELoss         | 0     \n",
+      "1  | in_act                   | Mish            | 0     \n",
+      "2  | convolutional_compressor | DaskCompression | 3.2 K \n",
+      "3  | compressor_act           | SoftExp         | 1     \n",
+      "4  | conv                     | Conv1d          | 3.2 K \n",
+      "5  | conv_act                 | Sigmoid         | 0     \n",
+      "6  | combine_channels         | Conv1d          | 6     \n",
+      "7  | channel_combine_act      | GELU            | 0     \n",
+      "8  | ff                       | Sequential      | 525 K \n",
+      "9  | out_dense                | Linear          | 11.8 K\n",
+      "10 | out_act                  | Tanhshrink      | 0     \n",
+      "--------------------------------------------------------------\n",
+      "543 K     Trainable params\n",
+      "0         Non-trainable params\n",
+      "543 K     Total params\n",
+      "2.174     Total estimated model params size (MB)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "25d7ba2f5e3c4f68a55fdafed5a5b092",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Training: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Finally, train the model\n",
+    "trainer.fit(model, train, valid)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/expt3.py
+++ b/expt3.py
@@ -0,0 +1,101 @@
+# From expt2 selected trials ???
+# Data handling imports
+from dask.distributed import Client, LocalCluster
+import dask.array as da
+
+# Deep learning imports
+import torch
+from torch.utils.data import DataLoader
+from torch import nn
+from torch import optim
+import pytorch_lightning as pl
+from pytorch_lightning import Trainer
+from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
+from pytorch_lightning.loggers import WandbLogger
+
+from expt1 import (
+    Model,
+    device,
+    X_train,
+    y_train,
+    X_val,
+    y_val,
+    collate_fn,
+)
+from custom_activations import SoftExp, PBessel
+
+# Suppress some warning messages from pytorch_lightning,
+# It really doesn't like that i've forced it to handle a dask array!
+import warnings
+import logging
+
+warnings.filterwarnings("ignore", category=UserWarning, module=pl.__name__)
+# Also, set up a log to record debug messages for failed trials
+logging.basicConfig(filename="debug.log", encoding="utf-8", level=logging.ERROR)
+
+if __name__ == "__main__":
+    cluster = LocalCluster(n_workers=8, threads_per_worker=1)
+    client = Client(cluster)
+
+
+    # Prepare datasets
+    train = DataLoader(
+        list(zip(X_train.values(), y_train.values())),
+        collate_fn=collate_fn,
+        shuffle=True,
+    )
+    valid = DataLoader(
+        list(zip(X_val.values(), y_val.values())),
+        shuffle=True,
+        collate_fn=collate_fn,
+    )
+    
+# Set up the model architecture and other necessary components
+model = Model(
+    # Training parameters
+    optimizer=optim.Adam,
+    # Model parameters
+    compressor_kernel_size=128,
+    compressor_chunk_size=128,
+    compressor_act=(SoftExp, (), {}),
+    conv_kernel_size=128,
+    conv_act=(nn.Tanh, (), {}),
+    conv_norm=False,
+    channel_combine_act=(nn.Softplus, (), {}),
+    param_ff_depth=2,
+    param_ff_width=16,
+    param_ff_act=(PBessel, (), {}),
+    ff_width=1024,
+    ff_depth=6,
+    ff_act=(nn.Softplus, (), {}),
+    out_size=2,
+    out_act=(nn.Sigmoid, tuple(), dict()),
+).to(device)
+
+if __name__ == "__main__":
+    early_stop_callback = EarlyStopping(
+        monitor="val_loss", patience=15, verbose=False, mode="min"
+    )
+
+    checkpoint_callback = ModelCheckpoint(
+        monitor="val_loss",
+        dirpath="./checkpoints",
+        filename="checkpoint-{epoch:02d}-{val_loss:.2f}",
+        save_top_k=10,
+        mode="min",
+    )
+
+    logger = WandbLogger(project="Aconity_ML_Expt1", name="Test 3")
+    logger.experiment.watch(model, log="all", log_freq=1)
+
+    trainer = Trainer(
+        accelerator="gpu",
+        max_epochs=-1,
+        devices="auto",
+        strategy="auto",
+        logger=logger,
+        callbacks=[checkpoint_callback, early_stop_callback],
+        num_sanity_val_steps=0,  # Disabled or we get error because X is dask array
+    )
+    # Finally, train the model
+    trainer.fit(model, train, valid)
--- a/expt3_analysis.ipynb
+++ b/expt3_analysis.ipynb
--- a/process_X_data.ipynb
+++ b/process_X_data.ipynb
--- a/process_y_data.ipynb
+++ b/process_y_data.ipynb
@@ -0,0 +1,213 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import pickle"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read the excel file\n",
+    "doe_df = pd.read_excel(\n",
+    "    \"data/NiTi_Cubes_Analysis.xlsx\",\n",
+    "    sheet_name=\"DOE & RSPNS\",\n",
+    "    header=1,\n",
+    "    usecols=\"A:M, T:AC\",\n",
+    "    nrows=81,\n",
+    ")\n",
+    "# Remove newlines from column names\n",
+    "doe_df.rename(\n",
+    "    mapper=dict(zip(doe_df.keys(), (k.replace(\"\\n\", \" \") for k in doe_df.keys()))),\n",
+    "    axis=1,\n",
+    "    inplace=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "doe_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Split the dataframe into a dictionary of dataframes, one for each sample\n",
+    "sample_y = dict(iter(doe_df.groupby(\"Sample\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sample_y[1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Finally, pickle this data for use in experiments\n",
+    "with open(\"sample_y.pkl\", \"wb\") as f:\n",
+    "    pickle.dump(sample_y, f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Sample</th>\n",
+       "      <th>Laser power, P\\n(W)</th>\n",
+       "      <th>Scan speed, V\\n(mm/sec)</th>\n",
+       "      <th>Spot size, F\\n(µm)</th>\n",
+       "      <th>Hatch spacing, H\\n(µm)</th>\n",
+       "      <th>Surface Energy Density @ 90µm Layer thickness, El (J/mm2)</th>\n",
+       "      <th>Surface Energy Density @ Spot size, EF (J/mm2)</th>\n",
+       "      <th>Vol. Energy Density @ Hatch Spacing,  VEDH  (J/mm3)</th>\n",
+       "      <th>Vol. Energy Density @ Spot Size, VEDF (J/mm3)</th>\n",
+       "      <th>Density\\n(Archimedes by Acetone)</th>\n",
+       "      <th>...</th>\n",
+       "      <th>Ni</th>\n",
+       "      <th>Ti</th>\n",
+       "      <th>Oxygen</th>\n",
+       "      <th>Carbon</th>\n",
+       "      <th>Ni (Norm)</th>\n",
+       "      <th>Ti (Norm)</th>\n",
+       "      <th>Sa (um)</th>\n",
+       "      <th>Sku</th>\n",
+       "      <th>Ssk</th>\n",
+       "      <th>Sz (um)</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>180</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>40</td>\n",
+       "      <td>40</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>50.0</td>\n",
+       "      <td>50.0</td>\n",
+       "      <td>6.343695</td>\n",
+       "      <td>...</td>\n",
+       "      <td>41.33</td>\n",
+       "      <td>43.76</td>\n",
+       "      <td>1.1</td>\n",
+       "      <td>13.81</td>\n",
+       "      <td>48.5721</td>\n",
+       "      <td>51.4279</td>\n",
+       "      <td>18.686</td>\n",
+       "      <td>3.243</td>\n",
+       "      <td>0.28</td>\n",
+       "      <td>187.116</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>1 rows × 23 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Sample  Laser power, P\\n(W)  Scan speed, V\\n(mm/sec)  Spot size, F\\n(µm)  \\\n",
+       "0       1                  180                     1000                  40   \n",
+       "\n",
+       "   Hatch spacing, H\\n(µm)  \\\n",
+       "0                      40   \n",
+       "\n",
+       "   Surface Energy Density @ 90µm Layer thickness, El (J/mm2)  \\\n",
+       "0                                                2.0           \n",
+       "\n",
+       "   Surface Energy Density @ Spot size, EF (J/mm2)  \\\n",
+       "0                                             4.5   \n",
+       "\n",
+       "   Vol. Energy Density @ Hatch Spacing,  VEDH  (J/mm3)  \\\n",
+       "0                                               50.0     \n",
+       "\n",
+       "   Vol. Energy Density @ Spot Size, VEDF (J/mm3)  \\\n",
+       "0                                           50.0   \n",
+       "\n",
+       "   Density\\n(Archimedes by Acetone)  ...     Ni     Ti  Oxygen  Carbon  \\\n",
+       "0                          6.343695  ...  41.33  43.76     1.1   13.81   \n",
+       "\n",
+       "   Ni (Norm)  Ti (Norm)  Sa (um)    Sku   Ssk  Sz (um)  \n",
+       "0    48.5721    51.4279   18.686  3.243  0.28  187.116  \n",
+       "\n",
+       "[1 rows x 23 columns]"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sample_y[1]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,53 @@
+[tool.poetry]
+name = "aconity-ml-test"
+version = "0.1.0"
+description = ""
+authors = ["Cian Hughes <cian.hughes@dcu.ie>"]
+readme = "README.md"
+# packages = [{include = "aconity_ml_test"}]
+
+[tool.poetry.dependencies]
+python = ">=3.9,<3.12"
+read_layers = { file = "../MTPy/wheels/read_layers-0.1.0-cp311-cp311-manylinux_2_34_x86_64.whl" }
+dask = { extras = ["distributed", "graphviz"], version = "*" }
+pytorch-lightning = "^2.0.6"
+xgboost = "^1.7.6"
+optuna = "^3.2.0"
+wandb = "^0.15.7"
+numba = "^0.57.1"
+tqdm = "^4.65.0"
+matplotlib = "^3.7.2"
+plotly = "^5.15.0"
+bokeh = "^3.2.1"
+holoviews = "^1.17.0"
+datashader = "^0.15.1"
+psutil = "^5.9.5"
+pandas = "^2.0.3"
+tables = "^3.8.0"
+lz4 = "^4.3.2"
+openpyxl = "^3.1.2"
+odfpy = "^1.4.1"
+fsspec = "^2023.6.0"
+jupyterlab = "^4.0.3"
+jupyter = "^1.0.0"
+ipywidgets = "^8.0.7"
+pyarrow = "^12.0.1"
+jupyter-bokeh = "^3.0.7"
+torch = { file = "./wheel/torch-2.0.1+cu118-cp311-cp311-linux_x86_64.whl" }
+optuna-dashboard = "^0.10.3"
+pymysql = "^1.1.0"
+mysqlclient = "^2.2.0"
+tabulate = "^0.9.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[[tool.poetry.source]]
+name = "PyPI"
+priority = "primary"
+
+# [[tool.poetry.source]]
+# name = "nvidia"
+# url = "https://pypi.ngc.nvidia.com"
+# priority = "primary"