mirror of
https://github.com/Cian-H/Aconity_ML_Expt1.git
synced 2025-12-23 05:01:58 +00:00
Initial Commit
This commit is contained in:
360
act_plot.ipynb
Normal file
360
act_plot.ipynb
Normal file
File diff suppressed because one or more lines are too long
274
custom_activations.py
Normal file
274
custom_activations.py
Normal file
@@ -0,0 +1,274 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
|
||||
class SoftExp(nn.Module):
|
||||
"""
|
||||
Implementation of soft exponential activation.
|
||||
Shape:
|
||||
- Input: (N, *) where * means, any number of additional
|
||||
dimensions
|
||||
- Output: (N, *), same shape as the input
|
||||
Parameters:
|
||||
- alpha - trainable parameter
|
||||
References:
|
||||
- See related paper:
|
||||
https://arxiv.org/pdf/1602.01321.pdf
|
||||
Examples:
|
||||
>>> a1 = soft_exponential(256)
|
||||
>>> x = torch.randn(256)
|
||||
>>> x = a1(x)
|
||||
"""
|
||||
|
||||
def __init__(self, alpha=None, beta=None, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
if "device" in kwargs:
|
||||
self.device = kwargs["device"]
|
||||
else:
|
||||
self.device = torch.device("cpu")
|
||||
alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
|
||||
self.alpha = alpha.to(self.device)
|
||||
self.alpha.requiresGrad = True # set requiresGrad to true
|
||||
# self.__name__ == "SoftExp"
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Forward pass of the function.
|
||||
Applies the function to the input elementwise.
|
||||
"""
|
||||
if self.alpha == 0.0:
|
||||
return x
|
||||
|
||||
if self.alpha < 0.0:
|
||||
return -torch.log(1 - self.alpha * (x + self.alpha)) / self.alpha
|
||||
|
||||
if self.alpha > 0.0:
|
||||
return (torch.exp(self.alpha * x) - 1) / self.alpha + self.alpha
|
||||
|
||||
|
||||
@torch.jit.script
|
||||
def sech(x):
|
||||
return 1 / torch.cosh(x)
|
||||
|
||||
|
||||
@torch.jit.script
|
||||
def dip(x):
|
||||
return (-2.0261193218831233 * sech(x)) + 0.31303528549933146
|
||||
|
||||
|
||||
@torch.jit.script
|
||||
def bmu(x):
|
||||
return torch.where(
|
||||
x <= -1,
|
||||
-1 / torch.abs(x),
|
||||
torch.where(x >= 1, x - 2, dip(x)),
|
||||
)
|
||||
|
||||
|
||||
class BMU(nn.Module):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def forward(self, input):
|
||||
return bmu(input)
|
||||
|
||||
|
||||
class TrainableHybrid(nn.Module):
|
||||
def __init__(
|
||||
self, functions, function_args=None, function_kwargs=None, *args, **kwargs
|
||||
):
|
||||
super().__init__(*args, **kwargs)
|
||||
if function_args is None:
|
||||
function_args = [tuple() for _ in functions]
|
||||
if function_kwargs is None:
|
||||
function_kwargs = [dict() for _ in functions]
|
||||
if None in function_args:
|
||||
function_args = [
|
||||
tuple() if fa is None else fa for fa in function_args
|
||||
]
|
||||
if None in function_kwargs:
|
||||
function_kwargs = [
|
||||
dict() if fk is None else fk for fk in function_kwargs
|
||||
]
|
||||
self.functions = [
|
||||
f(*fa, *fk) for f, fa, fk in zip(functions, function_args, function_kwargs)
|
||||
]
|
||||
self.alpha = nn.Parameter(torch.randn(len(functions)))
|
||||
self.normalize_alpha()
|
||||
self.__name__ = (
|
||||
f"TrainableHybrid{str([f.__name__ for f in functions]).replace(' ', '')}"
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__name__
|
||||
|
||||
def normalize_alpha(self) -> None:
|
||||
self.alpha.data = self.alpha / torch.sum(self.alpha)
|
||||
|
||||
def apply_activations(self, input: torch.Tensor):
|
||||
return torch.sum(
|
||||
torch.stack(
|
||||
[a * f(input) for f, a in zip(self.functions, self.alpha)]
|
||||
),
|
||||
dim=0,
|
||||
)
|
||||
|
||||
def forward(self, input: torch.Tensor) -> torch.Tensor:
|
||||
self.normalize_alpha()
|
||||
return self.apply_activations(input)
|
||||
|
||||
def to(self, device):
|
||||
super().to(device)
|
||||
self.functions = [f.to(device) for f in self.functions]
|
||||
return self
|
||||
|
||||
|
||||
class ISRU(nn.Module):
|
||||
def __init__(self, alpha=None, beta=None, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
if "device" in kwargs:
|
||||
self.device = kwargs["device"]
|
||||
else:
|
||||
self.device = torch.device("cpu")
|
||||
alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
|
||||
self.alpha = alpha.to(self.device)
|
||||
self.alpha.requiresGrad = True
|
||||
self.__name__ = "ISRU"
|
||||
|
||||
def forward(self, x):
|
||||
return x / torch.sqrt(1 + self.alpha * x**2)
|
||||
|
||||
|
||||
class ISRLU(nn.Module):
|
||||
def __init__(self, alpha=1.0, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
if "device" in kwargs:
|
||||
self.device = kwargs["device"]
|
||||
else:
|
||||
self.device = torch.device("cpu")
|
||||
alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
|
||||
self.alpha = alpha.to(self.device)
|
||||
self.alpha.requiresGrad = True
|
||||
self.isru = ISRU(alpha)
|
||||
self.__name__ = "ISRLU"
|
||||
|
||||
def forward(self, x):
|
||||
return torch.where(x >= 0, x, self.isru(x))
|
||||
|
||||
|
||||
class PBessel(nn.Module):
|
||||
def __init__(self, alpha=None, beta=None, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
if "device" in kwargs:
|
||||
self.device = kwargs["device"]
|
||||
else:
|
||||
self.device = torch.device("cpu")
|
||||
alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
|
||||
beta = torch.tensor(beta) if beta is not None else nn.Parameter(torch.randn(1))
|
||||
self.alpha = alpha.to(self.device)
|
||||
self.beta = beta.to(self.device)
|
||||
self.alpha.requiresGrad = True
|
||||
self.beta.requiresGrad = True
|
||||
self.__name__ = "PBessel"
|
||||
|
||||
def forward(self, input):
|
||||
gamma = 1 - self.alpha
|
||||
return (self.alpha * torch.special.bessel_j0(self.beta * input)) + (
|
||||
gamma * torch.special.bessel_j1(self.beta * input)
|
||||
)
|
||||
|
||||
|
||||
class LeakyPReQU(nn.Module):
|
||||
def __init__(self, alpha=None, beta=None, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
if "device" in kwargs:
|
||||
self.device = kwargs["device"]
|
||||
else:
|
||||
self.device = torch.device("cpu")
|
||||
alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
|
||||
beta = torch.tensor(beta) if beta is not None else nn.Parameter(torch.randn(1))
|
||||
self.alpha = alpha.to(self.device)
|
||||
self.beta = beta.to(self.device)
|
||||
self.alpha.requiresGrad = True
|
||||
self.beta.requiresGrad = True
|
||||
self.__name__ = "LeakyPReQU"
|
||||
|
||||
def forward(self, input):
|
||||
return torch.where(
|
||||
input > 0,
|
||||
(self.alpha * input * input) + (self.beta * input),
|
||||
self.beta * input,
|
||||
)
|
||||
|
||||
|
||||
class Sinusoid(nn.Module):
|
||||
def __init__(self, alpha=None, beta=None, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
if "device" in kwargs:
|
||||
self.device = kwargs["device"]
|
||||
else:
|
||||
self.device = torch.device("cpu")
|
||||
alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
|
||||
beta = torch.tensor(beta) if beta is not None else nn.Parameter(torch.randn(1))
|
||||
self.alpha = alpha.to(self.device)
|
||||
self.beta = beta.to(self.device)
|
||||
self.alpha.requiresGrad = True
|
||||
self.beta.requiresGrad = True
|
||||
self.__name__ = "Sinusoid"
|
||||
|
||||
def forward(self, input):
|
||||
return torch.sin(self.alpha * (input + self.beta))
|
||||
|
||||
|
||||
class Modulo(nn.Module):
|
||||
def __init__(self, alpha=None, beta=None, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
if "device" in kwargs:
|
||||
self.device = kwargs["device"]
|
||||
else:
|
||||
self.device = torch.device("cpu")
|
||||
alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
|
||||
beta = torch.tensor(beta) if beta is not None else nn.Parameter(torch.randn(1))
|
||||
self.alpha = alpha.to(self.device)
|
||||
self.beta = beta.to(self.device)
|
||||
self.alpha.requiresGrad = True
|
||||
self.beta.requiresGrad = True
|
||||
self.__name__ = "Modulo"
|
||||
|
||||
def forward(self, input):
|
||||
return torch.fmod(self.alpha * input, self.beta)
|
||||
|
||||
|
||||
class TriWave(nn.Module):
|
||||
def __init__(self, alpha=None, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
if "device" in kwargs:
|
||||
self.device = kwargs["device"]
|
||||
else:
|
||||
self.device = torch.device("cpu")
|
||||
alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
|
||||
self.alpha = alpha.to(self.device)
|
||||
self.alpha.requiresGrad = True
|
||||
self.__name__ = "TriWave"
|
||||
|
||||
def forward(self, input):
|
||||
return torch.abs(2 * (input / self.alpha - torch.floor(input / self.alpha + 0.5))) # noqa
|
||||
|
||||
|
||||
class Gaussian(nn.Module):
|
||||
def __init__(self, alpha=None, beta=None, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
if "device" in kwargs:
|
||||
self.device = kwargs["device"]
|
||||
else:
|
||||
self.device = torch.device("cpu")
|
||||
alpha = torch.tensor(alpha) if alpha is not None else nn.Parameter(torch.randn(1)) # noqa
|
||||
beta = torch.tensor(beta) if beta is not None else nn.Parameter(torch.randn(1))
|
||||
self.alpha = alpha.to(self.device)
|
||||
self.beta = beta.to(self.device)
|
||||
self.alpha.requiresGrad = True
|
||||
self.beta.requiresGrad = True
|
||||
self.__name__ = "Gaussian"
|
||||
|
||||
def forward(self, x):
|
||||
return torch.exp(-(((x-self.alpha)**2)/(2*self.beta**2)))
|
||||
1012
data_postprocessing.ipynb
Normal file
1012
data_postprocessing.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
1421
expt1.ipynb
Normal file
1421
expt1.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
547
expt1.py
Normal file
547
expt1.py
Normal file
@@ -0,0 +1,547 @@
|
||||
# %% [markdown]
|
||||
# <h1>Experiment 1</h1>
|
||||
# <h3>Initial hyperparameter tuning</h3>
|
||||
# <p>Summary</p>
|
||||
# <ul>
|
||||
# <li>A model was created with a dynamic constructor, allowing for a hyperparameter-driven model</li>
|
||||
# <li>Hyperparameters were tuned using <code>`Optuna`</code></li>
|
||||
# <li>Training loop was constructed using <code>`PyTorchLightning`</code></li>
|
||||
# <li>Model was trained on a cluster of machines using a shared SQL trial database</li>
|
||||
# <li>An extremely aggressive pruning algorithm was used to quickly narrow in on an optimal hyperparameter space</li>
|
||||
# <li>Experiment 1 was left to train on the cluster for 2 days</li>
|
||||
# </ul>
|
||||
|
||||
# %%
|
||||
# Data handling imports
|
||||
from dask.distributed import Client, LocalCluster
|
||||
import dask
|
||||
import dask.dataframe as dd
|
||||
import dask.array as da
|
||||
import numpy as np
|
||||
import pickle
|
||||
import random
|
||||
from itertools import chain
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
# Deep learning imports
|
||||
import torch
|
||||
from torch.utils.data import DataLoader
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from torch import optim
|
||||
import pytorch_lightning as pl
|
||||
from torchmetrics import MeanSquaredError
|
||||
from pytorch_lightning import Trainer
|
||||
import optuna
|
||||
from optuna.pruners import HyperbandPruner
|
||||
from optuna.integration import PyTorchLightningPruningCallback
|
||||
|
||||
|
||||
# Suppress some warning messages from pytorch_lightning,
|
||||
# It really doesn't like that i've forced it to handle a dask array!
|
||||
import warnings
|
||||
|
||||
warnings.filterwarnings("ignore", category=UserWarning, module=pl.__name__)
|
||||
|
||||
# Also, set up a log to record debug messages for failed trials
|
||||
import logging
|
||||
|
||||
logging.basicConfig(filename="debug.log", encoding="utf-8", level=logging.ERROR)
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Patching PyTorchLightning</h3>
|
||||
# <p>
|
||||
# A key part of this project was to develop a patch for PyTorchLightning to allow for the use of <code>`dask`</code> arrays as inputs. It was important that PyTorchLightning can accept <code>`dask`</code> arrays and only load the data into memory when needed. Otherwise, our extremely large datasets would simply crash our system as they are significantly larger than the available RAM and VRAM.
|
||||
# </p><p>
|
||||
# After several versions of the patch, this final version was developed. It is a simple monkey patch that wraps the <code>pytorch_lightning.utlities.data._extract_batch_size</code> generator with a check that mimics the expected behaviour for torch tensors when given a dask array and extends its type signature to ensure static analysis is still possible.
|
||||
# </p><p>
|
||||
# With this patch applied, the forward method in our model can accept a dask array and only compute each chunk of the array when needed. This allows us to train our model on datasets that are significantly larger than the available memory.
|
||||
# </p>
|
||||
|
||||
# %%
|
||||
# Monkey patch to allow pytorch lightning to accept a dask array as a model input
|
||||
from typing import Any, Generator, Iterable, Mapping, Optional, Union
|
||||
|
||||
BType = Union[da.Array, torch.Tensor, str, Mapping[Any, "BType"], Iterable["BType"]]
|
||||
|
||||
unpatched = pl.utilities.data._extract_batch_size
|
||||
|
||||
|
||||
def patch(batch: BType) -> Generator[Optional[int], None, None]:
|
||||
if isinstance(batch, da.core.Array):
|
||||
if len(batch.shape) == 0:
|
||||
yield 1
|
||||
else:
|
||||
yield batch.shape[0]
|
||||
else:
|
||||
yield from unpatched(batch)
|
||||
|
||||
|
||||
pl.utilities.data._extract_batch_size = patch
|
||||
|
||||
# %%
|
||||
# Set the device to use with torch
|
||||
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
||||
# Prepare a dask cluster and client
|
||||
def create_client():
|
||||
cluster = LocalCluster(n_workers=2, threads_per_worker=1)
|
||||
client = Client(cluster)
|
||||
return client
|
||||
|
||||
if __name__ == "__main__":
|
||||
client = create_client()
|
||||
|
||||
# %%
|
||||
# Load X and y for training
|
||||
samples = list(range(1, 82))
|
||||
|
||||
with open("sample_X.pkl", "rb") as f:
|
||||
X = pickle.load(f)
|
||||
|
||||
with open("sample_y.pkl", "rb") as f:
|
||||
y = pickle.load(f)
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Dataset Splitting</h3>
|
||||
# <p>The dataset is split into a training and validation dataset (80:20 split). Because the number of available samples is extremely small, we haven't produced a test dataset. In the future, as more data is obtained, a test set should be included whenever possible.</p>
|
||||
|
||||
# %%
|
||||
# Separate samples into training and validation sets
|
||||
val_samples = random.sample(samples, k=len(samples) // 5)
|
||||
train_samples = [s for s in samples if s not in val_samples]
|
||||
|
||||
X_train = {i: X[i] for i in train_samples}
|
||||
X_val = {i: X[i] for i in val_samples}
|
||||
y_train = {i: y[i] for i in train_samples}
|
||||
y_val = {i: y[i] for i in val_samples}
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Dataset Collation</h3>
|
||||
# <p>This function returns a closure for collating our data in a torch DataLoader. The use of a DataLoader will allow us to shuffle and prefetch data, reducing overfitting and maximising performance as IO will be a bottleneck. The closure is dynamically constructed, allowing us to select the outputs we train against. However, for this experiment we will match against all outputs for simplicity.</p>
|
||||
|
||||
# %%
|
||||
# Create a function to dynamically modify data collation
|
||||
def collate_fn(batch):
|
||||
X0 = batch[0][0][0].to_numpy(dtype=np.float32)[0]
|
||||
X1 = batch[0][0][1].to_dask_array(lengths=True)
|
||||
y = batch[0][1].to_numpy(dtype=np.float32)
|
||||
return (
|
||||
torch.from_numpy(X0).to(device),
|
||||
X1,
|
||||
torch.from_numpy(y).to(device),
|
||||
)
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Convolutional Data Compression</h3>
|
||||
# <p>
|
||||
# The <code>`DaskCompression`</code> module accepts a dask array, and applies a convolutional kernel to it to significantly compress the input data. This allows us to transform a larger than VRAM dataset into one that can fit on our GPU, and (hopefully) retain the relevant information to train the rest of our model on.
|
||||
# </p><p>
|
||||
# Note how the kernel is only computed in line 12 and is immediately compressed via convolution. This ensures that only one kernel needs to be stored in memory at a time, avoiding the need to hold the entire dataset in memory at once.
|
||||
# </p>
|
||||
|
||||
# %%
|
||||
class DaskCompression(nn.Module):
|
||||
def __init__(
|
||||
self, in_channels, out_channels, kernel_size, chunk_size=1, device=device
|
||||
):
|
||||
super(DaskCompression, self).__init__()
|
||||
self.kernel_size = kernel_size
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels
|
||||
self.chunk_size = chunk_size
|
||||
self.device = device
|
||||
self.conv = nn.Conv1d(in_channels, out_channels, kernel_size).to(device)
|
||||
|
||||
def compress_kernel(self, kernel):
|
||||
return (
|
||||
self.conv(torch.from_numpy(kernel.compute()).to(self.device))
|
||||
.squeeze()
|
||||
.to("cpu") # return to cpu to save VRAM
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
# Precompute the dimensions of the output array
|
||||
dim0, dim2 = x.shape
|
||||
assert dim2 == self.in_channels
|
||||
dim0 = (dim0 // self.kernel_size) // self.chunk_size
|
||||
x = x.reshape(dim0, self.chunk_size, self.kernel_size, dim2)
|
||||
x = da.transpose(x, axes=(0, 1, 3, 2))
|
||||
|
||||
x = [self.compress_kernel(kernel) for kernel in x]
|
||||
return torch.stack(x).to(self.device)
|
||||
|
||||
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Model Design</h3>
|
||||
# <p>
|
||||
# The model was designed to be a dynamically constructed, hyperparameter driven model for ease of hyperparameter optimisation. The contructed model will process data in the following way:
|
||||
# </p>
|
||||
# <ol>
|
||||
# <li>The input is left/right padded to a multiple of the compressor kernel size</li>
|
||||
# <li>The dask array is compressed by a <code>`DaskCompressor`</code> layer, treating each input as a channel</li>
|
||||
# <li>The compressed array is then recursively convoluted down to a size less than or equal to the width of our feedforward network</li>
|
||||
# <li>The channels of the now convolved data are combined</li>
|
||||
# <li>The combined, flattened data is then left/right padded to the width of the feedforward network</li>
|
||||
# <li>Finally, the data is fed into a feedforward network</li>
|
||||
# </ol>
|
||||
# <p>
|
||||
# This relatively simple design allows the network to accept both larger-than-ram datasets as inputs, and datasets of variable sizes. This makes it suitable for training on whole Aconity datasets, without the need for culling or binning.
|
||||
# </p>
|
||||
|
||||
# %%
|
||||
class Model(pl.LightningModule):
|
||||
def __init__(
|
||||
self,
|
||||
# pl attributes
|
||||
optimizer=torch.optim.Adam,
|
||||
optimizer_args=(),
|
||||
optimizer_kwargs={},
|
||||
scheduler=None,
|
||||
scheduler_kwargs={},
|
||||
loss=torch.nn.MSELoss(),
|
||||
train_ds=None,
|
||||
val_ds=None,
|
||||
# model args & kwargs
|
||||
compressor_kernel_size=128,
|
||||
compressor_chunk_size=128,
|
||||
compressor_act=(nn.ReLU, (), {}),
|
||||
conv_kernel_size=128,
|
||||
conv_norm=False,
|
||||
conv_act=(nn.ReLU, (), {}),
|
||||
channel_combine_act=(nn.ReLU, (), {}),
|
||||
param_ff_depth=4,
|
||||
param_ff_width=16,
|
||||
param_ff_act=(nn.ReLU, (), {}),
|
||||
ff_width=512,
|
||||
ff_depth=4,
|
||||
ff_act=(nn.ReLU, (), {}),
|
||||
out_size=6,
|
||||
out_act=(nn.ReLU, (), {}),
|
||||
):
|
||||
super().__init__()
|
||||
# Assign necessary attributes for pl model
|
||||
self.optimizer = optimizer
|
||||
self.optimizer_args = optimizer_args
|
||||
self.optimizer_kwargs = optimizer_kwargs
|
||||
self.scheduler = scheduler
|
||||
self.scheduler_kwargs = scheduler_kwargs
|
||||
self.loss = loss
|
||||
self.mse = MeanSquaredError()
|
||||
self.train_ds = train_ds
|
||||
self.val_ds = val_ds
|
||||
# Attrs for dynamically created model to be tested
|
||||
self.compressor_kernel_size = compressor_kernel_size
|
||||
self.compressor_chunk_size = compressor_chunk_size
|
||||
self.conv_kernel_size = conv_kernel_size
|
||||
self.ff_width = ff_width
|
||||
self.ff_depth = ff_depth
|
||||
self.out_size = out_size
|
||||
# layers
|
||||
# compressor compresses and converts dask array to torch tensor
|
||||
self.convolutional_compressor = DaskCompression(
|
||||
5,
|
||||
5,
|
||||
kernel_size=compressor_kernel_size,
|
||||
chunk_size=compressor_chunk_size,
|
||||
)
|
||||
self.compressor_act = compressor_act[0](*compressor_act[1], **compressor_act[2])
|
||||
# convolutional layer recursively applies convolutions to the compressed input
|
||||
self.conv = nn.Conv1d(5, 5, kernel_size=conv_kernel_size)
|
||||
self.conv_norm = nn.LocalResponseNorm(5) if conv_norm else nn.Identity()
|
||||
self.conv_act = conv_act[0](*conv_act[1], **conv_act[2])
|
||||
self.combine_channels = nn.Conv1d(5, 1, kernel_size=1)
|
||||
self.channel_combine_act = channel_combine_act[0](
|
||||
*channel_combine_act[1], **channel_combine_act[2]
|
||||
)
|
||||
self.param_ff = nn.Sequential(
|
||||
nn.Linear(4, param_ff_width),
|
||||
param_ff_act[0](*param_ff_act[1], **param_ff_act[2]),
|
||||
*chain(
|
||||
*(
|
||||
(
|
||||
nn.Linear(param_ff_width, param_ff_width),
|
||||
param_ff_act[0](*param_ff_act[1], **param_ff_act[2]),
|
||||
)
|
||||
for _ in range(param_ff_depth)
|
||||
)
|
||||
),
|
||||
)
|
||||
self.ff = nn.Sequential(
|
||||
nn.Linear(ff_width + param_ff_width, ff_width),
|
||||
ff_act[0](*ff_act[1], **ff_act[2]),
|
||||
*chain(
|
||||
*(
|
||||
(
|
||||
nn.Linear(ff_width, ff_width),
|
||||
ff_act[0](*ff_act[1], **ff_act[2]),
|
||||
)
|
||||
for _ in range(ff_depth)
|
||||
)
|
||||
),
|
||||
)
|
||||
self.out_dense = nn.Linear(ff_width, out_size)
|
||||
self.out_act = out_act[0](*out_act[1], **out_act[2])
|
||||
|
||||
@staticmethod
|
||||
def pad_ax0_to_multiple_of(x, multiple_of):
|
||||
padding = (((x.shape[0] // multiple_of) + 1) * multiple_of) - x.shape[0]
|
||||
left_pad = padding // 2
|
||||
right_pad = padding - left_pad
|
||||
return da.pad(
|
||||
x, ((left_pad, right_pad), (0, 0)), mode="constant", constant_values=0
|
||||
)
|
||||
|
||||
def pad_to_ff_width(self, x):
|
||||
padding = self.ff_width - x.shape[1]
|
||||
left_pad = padding // 2
|
||||
right_pad = padding - left_pad
|
||||
return F.pad(
|
||||
x,
|
||||
(right_pad, left_pad, 0, 0),
|
||||
mode="constant",
|
||||
value=0.0,
|
||||
)
|
||||
|
||||
def forward(self, x0, x1):
|
||||
# pad to a multiple of kernel_size * chunk_size
|
||||
x1 = self.pad_ax0_to_multiple_of(
|
||||
x1, self.compressor_kernel_size * self.compressor_chunk_size
|
||||
)
|
||||
x1 = self.convolutional_compressor(x1)
|
||||
x1 = x1.reshape(x1.shape[0] * x1.shape[1], x1.shape[2]).T.unsqueeze(0)
|
||||
while x1.shape[2] > self.ff_width:
|
||||
x1 = self.conv(x1)
|
||||
x1 = self.conv_norm(x1)
|
||||
x1 = self.conv_act(x1)
|
||||
x1 = self.combine_channels(x1)
|
||||
x1 = self.channel_combine_act(x1)
|
||||
x1 = x1.squeeze(1)
|
||||
x1 = self.pad_to_ff_width(x1)
|
||||
x0 = x0.unsqueeze(0)
|
||||
x0 = self.param_ff(x0)
|
||||
x = torch.cat((x1, x0), dim=1)
|
||||
x = self.ff(x)
|
||||
x = self.out_dense(x)
|
||||
x = self.out_act(x)
|
||||
return x
|
||||
|
||||
def configure_optimizers(self):
|
||||
optimizer = self.optimizer(
|
||||
self.parameters(), *self.optimizer_args, **self.optimizer_kwargs
|
||||
)
|
||||
if self.scheduler is not None:
|
||||
scheduler = self.scheduler(optimizer, **self.scheduler_kwargs)
|
||||
return optimizer, scheduler
|
||||
else:
|
||||
return optimizer
|
||||
|
||||
def train_dataloader(self):
|
||||
return self.train_ds
|
||||
|
||||
def val_dataloader(self):
|
||||
return self.val_ds
|
||||
|
||||
def training_step(self, batch, batch_idx):
|
||||
x0, x1, y = batch
|
||||
y_hat = self(x0, x1)
|
||||
loss = self.loss(y_hat, y)
|
||||
self.log("train_loss", loss)
|
||||
mse = self.mse(y_hat, y)
|
||||
self.log('train_MSE', mse, on_step=True, on_epoch=True, prog_bar=True)
|
||||
return loss
|
||||
|
||||
def validation_step(self, batch, batch_idx):
|
||||
x0, x1, y = batch
|
||||
y_hat = self(x0, x1)
|
||||
loss = self.loss(y_hat, y)
|
||||
self.log("val_loss", loss)
|
||||
mse = self.mse(y_hat, y)
|
||||
self.log('train_MSE', mse, on_step=True, on_epoch=True, prog_bar=True)
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Activation Functions</h3>
|
||||
# <p>
|
||||
# For our hyperparameter optimisation, we intend to test all the activation functions in PyTorch. In addition to the builtin activations, we will also train using the following custom implemented activation functions from literature or our own design:
|
||||
# </p>
|
||||
# <ol>
|
||||
# <li><b><code>BMU</code>:</b> Bio-Mimicking Unit; an activation function designed to mimic the activation potential of a biological neuron.</li>
|
||||
# <li><b><code>SoftExp</code>:</b> Soft Exponential function; a parametric activation function that fits to a wide variety of exponential curves (DOI: <a href=https://arxiv.org/abs/1602.01321v1>10.48550/arXiv.1602.01321</a>)</li>
|
||||
# <li><b><code>LeakyPReQU</code>:</b> Leaky Parametric Rectified Quadratic Unit; A smoothly and continuously differentiable function that is a parametrically sloped line for <code>x⋜0</code> and a quadratic curve for <code>x>0</code></li>
|
||||
# <li><b><code>ISRU</code>:</b> Inverse Square Root Unit; a somewhat uncommon function that can be useful in models such as this as it yields a continuously differentiable curve while being extremely fast to compute using bit manipulation</li>
|
||||
# <li><b><code>ISRLU</code>:</b> Inverse Square Root Linear Unit; a modified ISRU that is an ISRU for <code>x<0</code> and <code>`f(x)=x`</code> for <code>x⋝0</code> (DOI: <a href=https://arxiv.org/abs/1710.09967>10.48550/arXiv.1710.09967</a>)</li>
|
||||
# <li><b><code>PBessel</code>:</b> Parametric Besse; A parametric Bessel curve yielding various different wave formations depending on a trainable parameter</li>
|
||||
# <li><b><code>Sinusoid</code>:</b> A parametric sine wave, with amplitude and wavelength as trainable parameters</li>
|
||||
# <li><b><code>Modulo</code>:</b> A parametric sawtooth wave, <code>`f(x)=x%ɑ</code> where ɑ is a trainable parameter</li>
|
||||
# <li><b><code>TriWave</code>:</b> A parametric triangle wave, with amplitude and wavelength as trainable parameters</li>
|
||||
# <li><b><code>Gaussian</code>:</b> A parametric gaussian curve, with trainable amplitude</li>
|
||||
# </ol>
|
||||
|
||||
# %%
|
||||
# Create a dispatcher including all builtin activations and
|
||||
# Several custom activations from experimentation or literature
|
||||
from custom_activations import SoftExp, PBessel
|
||||
|
||||
|
||||
activation_dispatcher = {
|
||||
"Tanh": nn.Tanh,
|
||||
"SiLU": nn.SiLU,
|
||||
"Softplus": nn.Softplus,
|
||||
"SoftExp": SoftExp,
|
||||
"PBessel": PBessel,
|
||||
}
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Hyperparameter training</h3>
|
||||
# <p>Here, we define an objective function, describing what we want Optuna to do during each trial and how to react to various errors and/or situations that may arise. To summarise the objective:</p>
|
||||
# <ul>
|
||||
# <li>Optuna selects hyperparameters for all input parameters within the given constraints</li>
|
||||
# <li>A model is generated using the selected hyperparameters</li>
|
||||
# <li>PyTorchLightning trains the model through 2 epochs</li>
|
||||
# <li>The model is evaluated on the validation set</li>
|
||||
# <li>The validation loss is returned to Optuna</li>
|
||||
# </ul>
|
||||
# <p>
|
||||
# Optuna monitors the reported validation loss and attempts to minimise it. An extremely aggressive pruning strategy known as "hyperband pruning" is used to efficiently reduce down the parameter space to something more reasonable. Any parameter set which optuna deems suboptimal will be immediately pruned or even stopped early to save time.
|
||||
# </p>
|
||||
|
||||
# %%
|
||||
# Test parameters
|
||||
n_epochs = 2
|
||||
output_keys = list(next(iter(y_train.values())).keys())
|
||||
activation_vals = list(activation_dispatcher.keys())
|
||||
|
||||
|
||||
# Next we define the objective function for the hyperparameter optimization
|
||||
def objective(trial):
|
||||
torch.cuda.empty_cache()
|
||||
objective_value = torch.inf
|
||||
model = None
|
||||
logger = None
|
||||
try:
|
||||
# Select hyperparameters for testing
|
||||
compressor_kernel_size = 128
|
||||
compressor_chunk_size = 128
|
||||
compressor_act = (
|
||||
activation_dispatcher[
|
||||
trial.suggest_categorical("compressor_act", activation_vals)
|
||||
],
|
||||
(),
|
||||
{},
|
||||
)
|
||||
conv_kernel_size = 128
|
||||
conv_norm = trial.suggest_categorical("conv_norm", [True, False])
|
||||
conv_act = (
|
||||
activation_dispatcher[
|
||||
trial.suggest_categorical("conv_act", activation_vals)
|
||||
],
|
||||
(),
|
||||
{},
|
||||
)
|
||||
channel_combine_act = (
|
||||
activation_dispatcher[
|
||||
trial.suggest_categorical("channel_combine_act", activation_vals)
|
||||
],
|
||||
(),
|
||||
{},
|
||||
)
|
||||
param_ff_depth = trial.suggest_int("param_ff_depth", 2, 8, 2)
|
||||
param_ff_width = trial.suggest_int("param_ff_width", 16, 64, 16)
|
||||
param_ff_act = (
|
||||
activation_dispatcher[
|
||||
trial.suggest_categorical("param_ff_act", activation_vals)
|
||||
],
|
||||
(),
|
||||
{},
|
||||
)
|
||||
ff_width = trial.suggest_int("ff_width", 256, 1025, 256)
|
||||
ff_depth = trial.suggest_int("ff_depth", 2, 8, 2)
|
||||
ff_act = (
|
||||
activation_dispatcher[trial.suggest_categorical("ff_act", activation_vals)],
|
||||
(),
|
||||
{},
|
||||
)
|
||||
out_size = 2
|
||||
out_act = (nn.Sigmoid, tuple(), dict())
|
||||
|
||||
# Set up the model architecture and other necessary components
|
||||
model = Model(
|
||||
compressor_kernel_size=compressor_kernel_size,
|
||||
compressor_chunk_size=compressor_chunk_size,
|
||||
compressor_act=compressor_act,
|
||||
conv_kernel_size=conv_kernel_size,
|
||||
conv_act=conv_act,
|
||||
conv_norm=conv_norm,
|
||||
channel_combine_act=channel_combine_act,
|
||||
param_ff_depth=param_ff_depth,
|
||||
param_ff_width=param_ff_width,
|
||||
param_ff_act=param_ff_act,
|
||||
ff_width=ff_width,
|
||||
ff_depth=ff_depth,
|
||||
ff_act=ff_act,
|
||||
out_size=out_size,
|
||||
out_act=out_act,
|
||||
).to(device)
|
||||
|
||||
trainer = Trainer(
|
||||
accelerator="gpu",
|
||||
max_epochs=n_epochs,
|
||||
devices=1,
|
||||
logger=logger,
|
||||
num_sanity_val_steps=0, # Needs to be disabled or else we get an error because X is dask array
|
||||
# precision="16-mixed",
|
||||
callbacks=[
|
||||
PyTorchLightningPruningCallback(trial, monitor="val_loss"),
|
||||
],
|
||||
)
|
||||
# Prepare datasets
|
||||
train = DataLoader(
|
||||
list(zip(X_train.values(), y_train.values())),
|
||||
collate_fn=collate_fn,
|
||||
shuffle=True,
|
||||
)
|
||||
valid = DataLoader(
|
||||
list(zip(X_val.values(), y_val.values())),
|
||||
shuffle=True,
|
||||
collate_fn=collate_fn,
|
||||
)
|
||||
# Finally, train the model
|
||||
trainer.fit(model, train, valid)
|
||||
except Exception as e:
|
||||
logging.exception(f"An exception occurred in trial {trial.number}: {e}")
|
||||
raise optuna.exceptions.TrialPruned()
|
||||
finally:
|
||||
if logger is not None:
|
||||
logger.experiment.unwatch(model)
|
||||
logger.experiment.finish()
|
||||
del model
|
||||
torch.cuda.empty_cache()
|
||||
if objective_value == torch.inf:
|
||||
raise optuna.exceptions.TrialPruned()
|
||||
return objective_value
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Hyperparameter Optimisation on a Computing Cluster</h3>
|
||||
# <p>
|
||||
# The final important step is to run the optimisation using a cluster of computers to maximise the number of trials that can be run in parallel. Although this could be achieved using a more complex, scheduler controlled system and dask, we will use the far simpler approach of using a shared SQL ledger to keep track of the trials and their results. This is a very simple approach, but it is sufficient for our purposes, and is easy to implement. Using this approach, the model was trained on a cluster of 5 computers at once.
|
||||
# </p>
|
||||
|
||||
# %%
|
||||
if __name__ == "__main__":
|
||||
# storage_name = "sqlite:///optuna.sql"
|
||||
storage_name = "mysql+pymysql://root:Ch31121992@192.168.1.10:3306/optuna_db"
|
||||
study_name = "Composition Experiment 1"
|
||||
study = optuna.create_study(
|
||||
study_name=study_name,
|
||||
storage=storage_name,
|
||||
direction="minimize",
|
||||
pruner=HyperbandPruner(),
|
||||
load_if_exists=True,
|
||||
)
|
||||
study.optimize(
|
||||
objective,
|
||||
n_trials=None,
|
||||
timeout=None,
|
||||
)
|
||||
|
||||
|
||||
133
expt1_analysis.ipynb
Normal file
133
expt1_analysis.ipynb
Normal file
@@ -0,0 +1,133 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Data handling imports\n",
|
||||
"import numpy as np\n",
|
||||
"import pickle\n",
|
||||
"import random\n",
|
||||
"from tqdm.auto import tqdm\n",
|
||||
"import optuna"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"storage_name = \"mysql+pymysql://root:Ch31121992@192.168.1.10:3306/optuna_db\"\n",
|
||||
"study_name = \"Experiment 1\"\n",
|
||||
"study = optuna.load_study(\n",
|
||||
" study_name=study_name,\n",
|
||||
" storage=storage_name,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = study.trials_dataframe()\n",
|
||||
"df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.dropna(inplace=True)\n",
|
||||
"df.sort_values(by=\"value\", inplace=True)\n",
|
||||
"df.drop(df[\"value\"].idxmax(), inplace=True)\n",
|
||||
"df.drop(df[\"value\"].idxmax(), inplace=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"pd.options.plotting.backend = \"plotly\"\n",
|
||||
"params = list(df.keys()[5:-1])\n",
|
||||
"for p in params:\n",
|
||||
" df.plot(x=p, y=\"value\", kind=\"scatter\", title=p)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"params = list(df.keys()[5:-1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!poetry add tabulate\n",
|
||||
"from tabulate import tabulate\n",
|
||||
"print(\n",
|
||||
" tabulate(\n",
|
||||
" (x[0] for x in sorted(list(df.groupby(params)), key=lambda x: x[1][\"value\"].mean())),\n",
|
||||
" headers = params,\n",
|
||||
" tablefmt = \"grid\",\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for p in params:\n",
|
||||
" df.plot(x=p, y=\"value\", kind=\"scatter\", title=p).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
366
expt2.ipynb
Normal file
366
expt2.ipynb
Normal file
@@ -0,0 +1,366 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<h1>Experiment 2</h1>\n",
|
||||
"<h3>Targetted hyperparameter tuning</h3>\n",
|
||||
"<p>\n",
|
||||
"By examining the results of expt1, a smaller range of hyperparameters for expt2 was chosen. This allowed for a more targetted search of the hyperparameter space to find an optimal configuration. The selected parameters for expt2 were as follows:\n",
|
||||
"</p>\n",
|
||||
"<ul>\n",
|
||||
"<li>in_act = Linear, Mish, PBessel, or Tanhshrink</li>\n",
|
||||
"<li>compressor_kernel_size = 128</li>\n",
|
||||
"<li>compressor_act = Softshrink, SoftExp, or PReLU</li>\n",
|
||||
"<li>conv_kernel_size = 128</li>\n",
|
||||
"<li>conv_act = Sigmoid or PBessel</li>\n",
|
||||
"<li>channel_combine_act = HardSigmoid or GELU</li>\n",
|
||||
"<li>ff_width = 512</li>\n",
|
||||
"<li>ff_depth = 2, 4, or 6</li>\n",
|
||||
"<li>ff_act = CELU</li>\n",
|
||||
"<li>out_act = Tanhshrink or Mish</li>\n",
|
||||
"</ul>\n",
|
||||
"<p>\n",
|
||||
"Several of the parameters were able to be fixed to a specific value, and the remaining parameters (with the exception of <code>`in_act`</code>) were reduced to only 2 or 3 possible values, dramatically shrinking the parameter space. For this reason, a significantly less aggressive pruning algorithm was used, allowing for a more thorough search of the parameter space.\n",
|
||||
"</p>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Data handling imports\n",
|
||||
"from dask.distributed import Client, LocalCluster\n",
|
||||
"import dask\n",
|
||||
"import dask.dataframe as dd\n",
|
||||
"import dask.array as da\n",
|
||||
"import numpy as np\n",
|
||||
"import pickle\n",
|
||||
"import random\n",
|
||||
"from itertools import chain\n",
|
||||
"from tqdm.auto import tqdm\n",
|
||||
"\n",
|
||||
"# Deep learning imports\n",
|
||||
"import torch\n",
|
||||
"from torch.utils.data import DataLoader\n",
|
||||
"from torch import nn\n",
|
||||
"from torch.nn import functional as F\n",
|
||||
"from torch import optim\n",
|
||||
"import pytorch_lightning as pl\n",
|
||||
"from pytorch_lightning import Trainer\n",
|
||||
"import optuna\n",
|
||||
"from optuna.pruners import HyperbandPruner\n",
|
||||
"from optuna.integration import PyTorchLightningPruningCallback\n",
|
||||
"\n",
|
||||
"# Suppress some warning messages from pytorch_lightning,\n",
|
||||
"# It really doesn't like that i've forced it to handle a dask array!\n",
|
||||
"import warnings\n",
|
||||
"\n",
|
||||
"warnings.filterwarnings(\"ignore\", category=UserWarning, module=pl.__name__)\n",
|
||||
"\n",
|
||||
"# Also, set up a log to record debug messages for failed trials\n",
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"logging.basicConfig(filename=\"debug_test.log\", encoding=\"utf-8\", level=logging.DEBUG)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from expt1 import (\n",
|
||||
" Model,\n",
|
||||
" Linear,\n",
|
||||
" device,\n",
|
||||
" activation_dispatcher,\n",
|
||||
" X_train,\n",
|
||||
" y_train,\n",
|
||||
" X_val,\n",
|
||||
" y_val,\n",
|
||||
" create_collate_fn,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cluster = LocalCluster(n_workers=8, threads_per_worker=1)\n",
|
||||
"client = Client(cluster)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Monkey patch to allow pytorch lightning to accept a dask array as a model input\n",
|
||||
"from typing import Any, Generator, Iterable, Mapping, Optional, Union\n",
|
||||
"\n",
|
||||
"BType = Union[da.Array, torch.Tensor, str, Mapping[Any, \"BType\"], Iterable[\"BType\"]]\n",
|
||||
"\n",
|
||||
"unpatched = pl.utilities.data._extract_batch_size\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def patch(batch: BType) -> Generator[Optional[int], None, None]:\n",
|
||||
" if isinstance(batch, da.core.Array):\n",
|
||||
" if len(batch.shape) == 0:\n",
|
||||
" yield 1\n",
|
||||
" else:\n",
|
||||
" yield batch.shape[0]\n",
|
||||
" else:\n",
|
||||
" yield from unpatched(batch)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"pl.utilities.data._extract_batch_size = patch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Test parameters\n",
|
||||
"n_epochs = 10\n",
|
||||
"output_keys = list(next(iter(y_train.values())).keys())\n",
|
||||
"activation_vals = list(activation_dispatcher.keys())\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Next we define the objective function for the hyperparameter optimization\n",
|
||||
"def objective(trial):\n",
|
||||
" torch.cuda.empty_cache()\n",
|
||||
" objective_value = torch.inf\n",
|
||||
" model = None\n",
|
||||
" logger = None\n",
|
||||
" try:\n",
|
||||
" # Select hyperparameters for testing\n",
|
||||
" in_act = (\n",
|
||||
" activation_dispatcher[trial.suggest_categorical(\"in_act\", activation_vals)],\n",
|
||||
" (),\n",
|
||||
" {},\n",
|
||||
" )\n",
|
||||
" compressor_kernel_size = trial.suggest_int(\n",
|
||||
" \"compressor_kernel_size\", 64, 257, 64\n",
|
||||
" )\n",
|
||||
" compressor_chunk_size = 128\n",
|
||||
" compressor_act = (\n",
|
||||
" activation_dispatcher[\n",
|
||||
" trial.suggest_categorical(\"compressor_act\", activation_vals)\n",
|
||||
" ],\n",
|
||||
" (),\n",
|
||||
" {},\n",
|
||||
" )\n",
|
||||
" conv_kernel_size = trial.suggest_int(\"conv_kernel_size\", 64, 257, 64)\n",
|
||||
" conv_act = (\n",
|
||||
" activation_dispatcher[\n",
|
||||
" trial.suggest_categorical(\"conv_act\", activation_vals)\n",
|
||||
" ],\n",
|
||||
" (),\n",
|
||||
" {},\n",
|
||||
" )\n",
|
||||
" channel_combine_act = (\n",
|
||||
" activation_dispatcher[\n",
|
||||
" trial.suggest_categorical(\"channel_combine_act\", activation_vals)\n",
|
||||
" ],\n",
|
||||
" (),\n",
|
||||
" {},\n",
|
||||
" )\n",
|
||||
" ff_width = trial.suggest_int(\"ff_width\", 256, 1025, 256)\n",
|
||||
" ff_depth = trial.suggest_int(\"ff_depth\", 2, 8, 2)\n",
|
||||
" ff_act = (\n",
|
||||
" activation_dispatcher[trial.suggest_categorical(\"ff_act\", activation_vals)],\n",
|
||||
" (),\n",
|
||||
" {},\n",
|
||||
" )\n",
|
||||
" out_size = len(output_keys)\n",
|
||||
" out_act = (\n",
|
||||
" activation_dispatcher[\n",
|
||||
" trial.suggest_categorical(\"out_act\", activation_vals)\n",
|
||||
" ],\n",
|
||||
" (),\n",
|
||||
" {},\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Set up the model architecture and other necessary components\n",
|
||||
" model = Model(\n",
|
||||
" in_act=in_act,\n",
|
||||
" compressor_kernel_size=compressor_kernel_size,\n",
|
||||
" compressor_chunk_size=compressor_chunk_size,\n",
|
||||
" compressor_act=compressor_act,\n",
|
||||
" conv_kernel_size=conv_kernel_size,\n",
|
||||
" conv_act=conv_act,\n",
|
||||
" channel_combine_act=channel_combine_act,\n",
|
||||
" ff_width=ff_width,\n",
|
||||
" ff_depth=ff_depth,\n",
|
||||
" ff_act=ff_act,\n",
|
||||
" out_size=out_size,\n",
|
||||
" out_act=out_act,\n",
|
||||
" ).to(device)\n",
|
||||
"\n",
|
||||
" trainer = Trainer(\n",
|
||||
" accelerator=\"gpu\",\n",
|
||||
" max_epochs=n_epochs,\n",
|
||||
" devices=1,\n",
|
||||
" logger=logger,\n",
|
||||
" num_sanity_val_steps=0, # Needs to be disabled or else we get an error because X is dask array\n",
|
||||
" # precision=\"16-mixed\",\n",
|
||||
" callbacks=[\n",
|
||||
" PyTorchLightningPruningCallback(trial, monitor=\"val_loss\"),\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" # Prepare datasets\n",
|
||||
" train = DataLoader(\n",
|
||||
" list(zip(X_train.values(), y_train.values())),\n",
|
||||
" collate_fn=create_collate_fn(),\n",
|
||||
" shuffle=True,\n",
|
||||
" )\n",
|
||||
" valid = DataLoader(\n",
|
||||
" list(zip(X_val.values(), y_val.values())),\n",
|
||||
" shuffle=True,\n",
|
||||
" collate_fn=create_collate_fn(),\n",
|
||||
" )\n",
|
||||
" # Finally, train the model\n",
|
||||
" trainer.fit(model, train, valid)\n",
|
||||
" except torch.cuda.OutOfMemoryError as e:\n",
|
||||
" logging.warning(f\"Ran out of memory in trial {trial.number}!\")\n",
|
||||
" raise optuna.exceptions.TrialPruned()\n",
|
||||
" except Exception as e:\n",
|
||||
" logging.exception(f\"An exception occurred in trial {trial.number}: {e}\")\n",
|
||||
" raise optuna.exceptions.TrialPruned()\n",
|
||||
" finally:\n",
|
||||
" if logger is not None:\n",
|
||||
" logger.experiment.unwatch(model)\n",
|
||||
" logger.experiment.finish()\n",
|
||||
" del model\n",
|
||||
" torch.cuda.empty_cache()\n",
|
||||
" if objective_value == torch.inf:\n",
|
||||
" raise optuna.exceptions.TrialPruned()\n",
|
||||
" return objective_value"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[I 2023-07-31 23:49:15,744] Using an existing study with name 'Experiment 2' instead of creating a new one.\n",
|
||||
"[I 2023-07-31 23:49:16,553] Trial 221 pruned. \n",
|
||||
"[I 2023-07-31 23:49:16,928] Trial 222 pruned. \n",
|
||||
"[I 2023-07-31 23:49:17,318] Trial 223 pruned. \n",
|
||||
"[I 2023-07-31 23:49:17,682] Trial 224 pruned. \n",
|
||||
"[W 2023-07-31 23:49:18,028] Trial 225 failed with parameters: {} because of the following error: KeyboardInterrupt().\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py\", line 200, in _run_trial\n",
|
||||
" value_or_values = func(trial)\n",
|
||||
" ^^^^^^^^^^^\n",
|
||||
" File \"/tmp/ipykernel_562333/3392796582.py\", line 16, in objective\n",
|
||||
" activation_dispatcher[trial.suggest_categorical(\"in_act\", activation_vals)],\n",
|
||||
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||
" File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/trial/_trial.py\", line 405, in suggest_categorical\n",
|
||||
" return self._suggest(name, CategoricalDistribution(choices=choices))\n",
|
||||
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||
" File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/trial/_trial.py\", line 630, in _suggest\n",
|
||||
" param_value = self.study.sampler.sample_independent(\n",
|
||||
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||
" File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/sampler.py\", line 471, in sample_independent\n",
|
||||
" mpe_above = _ParzenEstimator(\n",
|
||||
" ^^^^^^^^^^^^^^^^^\n",
|
||||
" File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py\", line 75, in __init__\n",
|
||||
" distributions=[\n",
|
||||
" ^\n",
|
||||
" File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py\", line 76, in <listcomp>\n",
|
||||
" self._calculate_distributions(\n",
|
||||
" File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py\", line 154, in _calculate_distributions\n",
|
||||
" return self._calculate_categorical_distributions(\n",
|
||||
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||
" File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py\", line 192, in _calculate_categorical_distributions\n",
|
||||
" weights /= weights.sum(axis=1, keepdims=True)\n",
|
||||
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||
" File \"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/numpy/core/_methods.py\", line 47, in _sum\n",
|
||||
" def _sum(a, axis=None, dtype=None, out=None, keepdims=False,\n",
|
||||
" \n",
|
||||
"KeyboardInterrupt\n",
|
||||
"[W 2023-07-31 23:49:18,040] Trial 225 failed with value None.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "KeyboardInterrupt",
|
||||
"evalue": "",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[5], line 11\u001b[0m\n\u001b[1;32m 3\u001b[0m study_name \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mExperiment 2\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 4\u001b[0m study \u001b[39m=\u001b[39m optuna\u001b[39m.\u001b[39mcreate_study(\n\u001b[1;32m 5\u001b[0m study_name\u001b[39m=\u001b[39mstudy_name,\n\u001b[1;32m 6\u001b[0m storage\u001b[39m=\u001b[39mstorage_name,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 9\u001b[0m load_if_exists\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m,\n\u001b[1;32m 10\u001b[0m )\n\u001b[0;32m---> 11\u001b[0m study\u001b[39m.\u001b[39;49moptimize(\n\u001b[1;32m 12\u001b[0m objective,\n\u001b[1;32m 13\u001b[0m n_trials\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m,\n\u001b[1;32m 14\u001b[0m timeout\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m,\n\u001b[1;32m 15\u001b[0m )\n",
|
||||
"File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/study/study.py:443\u001b[0m, in \u001b[0;36mStudy.optimize\u001b[0;34m(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)\u001b[0m\n\u001b[1;32m 339\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39moptimize\u001b[39m(\n\u001b[1;32m 340\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 341\u001b[0m func: ObjectiveFuncType,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 348\u001b[0m show_progress_bar: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m,\n\u001b[1;32m 349\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 350\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Optimize an objective function.\u001b[39;00m\n\u001b[1;32m 351\u001b[0m \n\u001b[1;32m 352\u001b[0m \u001b[39m Optimization is done by choosing a suitable set of hyperparameter values from a given\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 440\u001b[0m \u001b[39m If nested invocation of this method occurs.\u001b[39;00m\n\u001b[1;32m 441\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 443\u001b[0m _optimize(\n\u001b[1;32m 444\u001b[0m study\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m,\n\u001b[1;32m 445\u001b[0m func\u001b[39m=\u001b[39;49mfunc,\n\u001b[1;32m 446\u001b[0m n_trials\u001b[39m=\u001b[39;49mn_trials,\n\u001b[1;32m 447\u001b[0m timeout\u001b[39m=\u001b[39;49mtimeout,\n\u001b[1;32m 448\u001b[0m n_jobs\u001b[39m=\u001b[39;49mn_jobs,\n\u001b[1;32m 449\u001b[0m catch\u001b[39m=\u001b[39;49m\u001b[39mtuple\u001b[39;49m(catch) \u001b[39mif\u001b[39;49;00m \u001b[39misinstance\u001b[39;49m(catch, Iterable) \u001b[39melse\u001b[39;49;00m (catch,),\n\u001b[1;32m 450\u001b[0m callbacks\u001b[39m=\u001b[39;49mcallbacks,\n\u001b[1;32m 451\u001b[0m gc_after_trial\u001b[39m=\u001b[39;49mgc_after_trial,\n\u001b[1;32m 452\u001b[0m show_progress_bar\u001b[39m=\u001b[39;49mshow_progress_bar,\n\u001b[1;32m 453\u001b[0m )\n",
|
||||
"File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py:66\u001b[0m, in \u001b[0;36m_optimize\u001b[0;34m(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 65\u001b[0m \u001b[39mif\u001b[39;00m n_jobs \u001b[39m==\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[0;32m---> 66\u001b[0m _optimize_sequential(\n\u001b[1;32m 67\u001b[0m study,\n\u001b[1;32m 68\u001b[0m func,\n\u001b[1;32m 69\u001b[0m n_trials,\n\u001b[1;32m 70\u001b[0m timeout,\n\u001b[1;32m 71\u001b[0m catch,\n\u001b[1;32m 72\u001b[0m callbacks,\n\u001b[1;32m 73\u001b[0m gc_after_trial,\n\u001b[1;32m 74\u001b[0m reseed_sampler_rng\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 75\u001b[0m time_start\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m,\n\u001b[1;32m 76\u001b[0m progress_bar\u001b[39m=\u001b[39;49mprogress_bar,\n\u001b[1;32m 77\u001b[0m )\n\u001b[1;32m 78\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 79\u001b[0m \u001b[39mif\u001b[39;00m n_jobs \u001b[39m==\u001b[39m \u001b[39m-\u001b[39m\u001b[39m1\u001b[39m:\n",
|
||||
"File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py:163\u001b[0m, in \u001b[0;36m_optimize_sequential\u001b[0;34m(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[39mbreak\u001b[39;00m\n\u001b[1;32m 162\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 163\u001b[0m frozen_trial \u001b[39m=\u001b[39m _run_trial(study, func, catch)\n\u001b[1;32m 164\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m 165\u001b[0m \u001b[39m# The following line mitigates memory problems that can be occurred in some\u001b[39;00m\n\u001b[1;32m 166\u001b[0m \u001b[39m# environments (e.g., services that use computing containers such as GitHub Actions).\u001b[39;00m\n\u001b[1;32m 167\u001b[0m \u001b[39m# Please refer to the following PR for further details:\u001b[39;00m\n\u001b[1;32m 168\u001b[0m \u001b[39m# https://github.com/optuna/optuna/pull/325.\u001b[39;00m\n\u001b[1;32m 169\u001b[0m \u001b[39mif\u001b[39;00m gc_after_trial:\n",
|
||||
"File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py:251\u001b[0m, in \u001b[0;36m_run_trial\u001b[0;34m(study, func, catch)\u001b[0m\n\u001b[1;32m 244\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mFalse\u001b[39;00m, \u001b[39m\"\u001b[39m\u001b[39mShould not reach.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 246\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[1;32m 247\u001b[0m frozen_trial\u001b[39m.\u001b[39mstate \u001b[39m==\u001b[39m TrialState\u001b[39m.\u001b[39mFAIL\n\u001b[1;32m 248\u001b[0m \u001b[39mand\u001b[39;00m func_err \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 249\u001b[0m \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(func_err, catch)\n\u001b[1;32m 250\u001b[0m ):\n\u001b[0;32m--> 251\u001b[0m \u001b[39mraise\u001b[39;00m func_err\n\u001b[1;32m 252\u001b[0m \u001b[39mreturn\u001b[39;00m frozen_trial\n",
|
||||
"File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py:200\u001b[0m, in \u001b[0;36m_run_trial\u001b[0;34m(study, func, catch)\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[39mwith\u001b[39;00m get_heartbeat_thread(trial\u001b[39m.\u001b[39m_trial_id, study\u001b[39m.\u001b[39m_storage):\n\u001b[1;32m 199\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 200\u001b[0m value_or_values \u001b[39m=\u001b[39m func(trial)\n\u001b[1;32m 201\u001b[0m \u001b[39mexcept\u001b[39;00m exceptions\u001b[39m.\u001b[39mTrialPruned \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 202\u001b[0m \u001b[39m# TODO(mamu): Handle multi-objective cases.\u001b[39;00m\n\u001b[1;32m 203\u001b[0m state \u001b[39m=\u001b[39m TrialState\u001b[39m.\u001b[39mPRUNED\n",
|
||||
"Cell \u001b[0;32mIn[4], line 16\u001b[0m, in \u001b[0;36mobjective\u001b[0;34m(trial)\u001b[0m\n\u001b[1;32m 12\u001b[0m logger \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 14\u001b[0m \u001b[39m# Select hyperparameters for testing\u001b[39;00m\n\u001b[1;32m 15\u001b[0m in_act \u001b[39m=\u001b[39m (\n\u001b[0;32m---> 16\u001b[0m activation_dispatcher[trial\u001b[39m.\u001b[39;49msuggest_categorical(\u001b[39m\"\u001b[39;49m\u001b[39min_act\u001b[39;49m\u001b[39m\"\u001b[39;49m, activation_vals)],\n\u001b[1;32m 17\u001b[0m (),\n\u001b[1;32m 18\u001b[0m {},\n\u001b[1;32m 19\u001b[0m )\n\u001b[1;32m 20\u001b[0m compressor_kernel_size \u001b[39m=\u001b[39m trial\u001b[39m.\u001b[39msuggest_int(\n\u001b[1;32m 21\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mcompressor_kernel_size\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m64\u001b[39m, \u001b[39m257\u001b[39m, \u001b[39m64\u001b[39m\n\u001b[1;32m 22\u001b[0m )\n\u001b[1;32m 23\u001b[0m compressor_chunk_size \u001b[39m=\u001b[39m \u001b[39m128\u001b[39m\n",
|
||||
"File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/trial/_trial.py:405\u001b[0m, in \u001b[0;36mTrial.suggest_categorical\u001b[0;34m(self, name, choices)\u001b[0m\n\u001b[1;32m 354\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Suggest a value for the categorical parameter.\u001b[39;00m\n\u001b[1;32m 355\u001b[0m \n\u001b[1;32m 356\u001b[0m \u001b[39mThe value is sampled from ``choices``.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 400\u001b[0m \u001b[39m :ref:`configurations` tutorial describes more details and flexible usages.\u001b[39;00m\n\u001b[1;32m 401\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 402\u001b[0m \u001b[39m# There is no need to call self._check_distribution because\u001b[39;00m\n\u001b[1;32m 403\u001b[0m \u001b[39m# CategoricalDistribution does not support dynamic value space.\u001b[39;00m\n\u001b[0;32m--> 405\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_suggest(name, CategoricalDistribution(choices\u001b[39m=\u001b[39;49mchoices))\n",
|
||||
"File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/trial/_trial.py:630\u001b[0m, in \u001b[0;36mTrial._suggest\u001b[0;34m(self, name, distribution)\u001b[0m\n\u001b[1;32m 628\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 629\u001b[0m study \u001b[39m=\u001b[39m pruners\u001b[39m.\u001b[39m_filter_study(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstudy, trial)\n\u001b[0;32m--> 630\u001b[0m param_value \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mstudy\u001b[39m.\u001b[39;49msampler\u001b[39m.\u001b[39;49msample_independent(\n\u001b[1;32m 631\u001b[0m study, trial, name, distribution\n\u001b[1;32m 632\u001b[0m )\n\u001b[1;32m 634\u001b[0m \u001b[39m# `param_value` is validated here (invalid value like `np.nan` raises ValueError).\u001b[39;00m\n\u001b[1;32m 635\u001b[0m param_value_in_internal_repr \u001b[39m=\u001b[39m distribution\u001b[39m.\u001b[39mto_internal_repr(param_value)\n",
|
||||
"File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/sampler.py:471\u001b[0m, in \u001b[0;36mTPESampler.sample_independent\u001b[0;34m(self, study, trial, param_name, param_distribution)\u001b[0m\n\u001b[1;32m 467\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 468\u001b[0m mpe_below \u001b[39m=\u001b[39m _ParzenEstimator(\n\u001b[1;32m 469\u001b[0m below, {param_name: param_distribution}, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_parzen_estimator_parameters\n\u001b[1;32m 470\u001b[0m )\n\u001b[0;32m--> 471\u001b[0m mpe_above \u001b[39m=\u001b[39m _ParzenEstimator(\n\u001b[1;32m 472\u001b[0m above, {param_name: param_distribution}, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_parzen_estimator_parameters\n\u001b[1;32m 473\u001b[0m )\n\u001b[1;32m 474\u001b[0m samples_below \u001b[39m=\u001b[39m mpe_below\u001b[39m.\u001b[39msample(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_rng, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_n_ei_candidates)\n\u001b[1;32m 475\u001b[0m log_likelihoods_below \u001b[39m=\u001b[39m mpe_below\u001b[39m.\u001b[39mlog_pdf(samples_below)\n",
|
||||
"File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py:75\u001b[0m, in \u001b[0;36m_ParzenEstimator.__init__\u001b[0;34m(self, observations, search_space, parameters, predetermined_weights)\u001b[0m\n\u001b[1;32m 71\u001b[0m weights \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mappend(weights, [parameters\u001b[39m.\u001b[39mprior_weight])\n\u001b[1;32m 72\u001b[0m weights \u001b[39m/\u001b[39m\u001b[39m=\u001b[39m weights\u001b[39m.\u001b[39msum()\n\u001b[1;32m 73\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_mixture_distribution \u001b[39m=\u001b[39m _MixtureOfProductDistribution(\n\u001b[1;32m 74\u001b[0m weights\u001b[39m=\u001b[39mweights,\n\u001b[0;32m---> 75\u001b[0m distributions\u001b[39m=\u001b[39m[\n\u001b[1;32m 76\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_calculate_distributions(\n\u001b[1;32m 77\u001b[0m transformed_observations[:, i], search_space[param], parameters\n\u001b[1;32m 78\u001b[0m )\n\u001b[1;32m 79\u001b[0m \u001b[39mfor\u001b[39;49;00m i, param \u001b[39min\u001b[39;49;00m \u001b[39menumerate\u001b[39;49m(search_space)\n\u001b[1;32m 80\u001b[0m ],\n\u001b[1;32m 81\u001b[0m )\n",
|
||||
"File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py:76\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 71\u001b[0m weights \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mappend(weights, [parameters\u001b[39m.\u001b[39mprior_weight])\n\u001b[1;32m 72\u001b[0m weights \u001b[39m/\u001b[39m\u001b[39m=\u001b[39m weights\u001b[39m.\u001b[39msum()\n\u001b[1;32m 73\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_mixture_distribution \u001b[39m=\u001b[39m _MixtureOfProductDistribution(\n\u001b[1;32m 74\u001b[0m weights\u001b[39m=\u001b[39mweights,\n\u001b[1;32m 75\u001b[0m distributions\u001b[39m=\u001b[39m[\n\u001b[0;32m---> 76\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_calculate_distributions(\n\u001b[1;32m 77\u001b[0m transformed_observations[:, i], search_space[param], parameters\n\u001b[1;32m 78\u001b[0m )\n\u001b[1;32m 79\u001b[0m \u001b[39mfor\u001b[39;00m i, param \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(search_space)\n\u001b[1;32m 80\u001b[0m ],\n\u001b[1;32m 81\u001b[0m )\n",
|
||||
"File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py:154\u001b[0m, in \u001b[0;36m_ParzenEstimator._calculate_distributions\u001b[0;34m(self, transformed_observations, search_space, parameters)\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_calculate_distributions\u001b[39m(\n\u001b[1;32m 148\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 149\u001b[0m transformed_observations: np\u001b[39m.\u001b[39mndarray,\n\u001b[1;32m 150\u001b[0m search_space: BaseDistribution,\n\u001b[1;32m 151\u001b[0m parameters: _ParzenEstimatorParameters,\n\u001b[1;32m 152\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m _BatchedDistributions:\n\u001b[1;32m 153\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(search_space, CategoricalDistribution):\n\u001b[0;32m--> 154\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_calculate_categorical_distributions(\n\u001b[1;32m 155\u001b[0m transformed_observations, search_space\u001b[39m.\u001b[39;49mchoices, parameters\n\u001b[1;32m 156\u001b[0m )\n\u001b[1;32m 157\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 158\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(search_space, (FloatDistribution, IntDistribution))\n",
|
||||
"File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/optuna/samplers/_tpe/parzen_estimator.py:192\u001b[0m, in \u001b[0;36m_ParzenEstimator._calculate_categorical_distributions\u001b[0;34m(self, observations, choices, parameters)\u001b[0m\n\u001b[1;32m 186\u001b[0m weights \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mfull(\n\u001b[1;32m 187\u001b[0m shape\u001b[39m=\u001b[39m(\u001b[39mlen\u001b[39m(observations) \u001b[39m+\u001b[39m consider_prior, \u001b[39mlen\u001b[39m(choices)),\n\u001b[1;32m 188\u001b[0m fill_value\u001b[39m=\u001b[39mparameters\u001b[39m.\u001b[39mprior_weight \u001b[39m/\u001b[39m (\u001b[39mlen\u001b[39m(observations) \u001b[39m+\u001b[39m consider_prior),\n\u001b[1;32m 189\u001b[0m )\n\u001b[1;32m 191\u001b[0m weights[np\u001b[39m.\u001b[39marange(\u001b[39mlen\u001b[39m(observations)), observations\u001b[39m.\u001b[39mastype(\u001b[39mint\u001b[39m)] \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[0;32m--> 192\u001b[0m weights \u001b[39m/\u001b[39m\u001b[39m=\u001b[39m weights\u001b[39m.\u001b[39;49msum(axis\u001b[39m=\u001b[39;49m\u001b[39m1\u001b[39;49m, keepdims\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n\u001b[1;32m 193\u001b[0m \u001b[39mreturn\u001b[39;00m _BatchedCategoricalDistributions(weights)\n",
|
||||
"File \u001b[0;32m~/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/numpy/core/_methods.py:47\u001b[0m, in \u001b[0;36m_sum\u001b[0;34m(a, axis, dtype, out, keepdims, initial, where)\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_amin\u001b[39m(a, axis\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, out\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, keepdims\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m,\n\u001b[1;32m 44\u001b[0m initial\u001b[39m=\u001b[39m_NoValue, where\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n\u001b[1;32m 45\u001b[0m \u001b[39mreturn\u001b[39;00m umr_minimum(a, axis, \u001b[39mNone\u001b[39;00m, out, keepdims, initial, where)\n\u001b[0;32m---> 47\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_sum\u001b[39m(a, axis\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, dtype\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, out\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, keepdims\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m,\n\u001b[1;32m 48\u001b[0m initial\u001b[39m=\u001b[39m_NoValue, where\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n\u001b[1;32m 49\u001b[0m \u001b[39mreturn\u001b[39;00m umr_sum(a, axis, dtype, out, keepdims, initial, where)\n\u001b[1;32m 51\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_prod\u001b[39m(a, axis\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, dtype\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, out\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, keepdims\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m,\n\u001b[1;32m 52\u001b[0m initial\u001b[39m=\u001b[39m_NoValue, where\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n",
|
||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"storage_name = \"sqlite:///optuna.sql\"\n",
|
||||
"storage_name = \"mysql+pymysql://root:Ch31121992@192.168.1.10:3306/optuna_db\"\n",
|
||||
"study_name = \"Experiment 2\"\n",
|
||||
"study = optuna.create_study(\n",
|
||||
" study_name=study_name,\n",
|
||||
" storage=storage_name,\n",
|
||||
" direction=\"minimize\",\n",
|
||||
" pruner=HyperbandPruner(),\n",
|
||||
" load_if_exists=True,\n",
|
||||
")\n",
|
||||
"study.optimize(\n",
|
||||
" objective,\n",
|
||||
" n_trials=None,\n",
|
||||
" timeout=None,\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
521
expt2.py
Normal file
521
expt2.py
Normal file
@@ -0,0 +1,521 @@
|
||||
# %% [markdown]
|
||||
# <h1>Experiment 1</h1>
|
||||
# <h3>Initial hyperparameter tuning</h3>
|
||||
# <p>Summary</p>
|
||||
# <ul>
|
||||
# <li>A model was created with a dynamic constructor, allowing for a hyperparameter-driven model</li>
|
||||
# <li>Hyperparameters were tuned using <code>`Optuna`</code></li>
|
||||
# <li>Training loop was constructed using <code>`PyTorchLightning`</code></li>
|
||||
# <li>Model was trained on a cluster of machines using a shared SQL trial database</li>
|
||||
# <li>An extremely aggressive pruning algorithm was used to quickly narrow in on an optimal hyperparameter space</li>
|
||||
# <li>Experiment 1 was left to train on the cluster for 2 days</li>
|
||||
# </ul>
|
||||
|
||||
# %%
|
||||
# Data handling imports
|
||||
from dask.distributed import Client, LocalCluster
|
||||
import dask
|
||||
import dask.dataframe as dd
|
||||
import dask.array as da
|
||||
import numpy as np
|
||||
import pickle
|
||||
import random
|
||||
from itertools import chain
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
# Deep learning imports
|
||||
import torch
|
||||
from torch.utils.data import DataLoader
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from torch import optim
|
||||
import pytorch_lightning as pl
|
||||
from pytorch_lightning import Trainer
|
||||
import optuna
|
||||
from optuna.pruners import HyperbandPruner
|
||||
from optuna.integration import PyTorchLightningPruningCallback
|
||||
|
||||
|
||||
# Suppress some warning messages from pytorch_lightning,
|
||||
# It really doesn't like that i've forced it to handle a dask array!
|
||||
import warnings
|
||||
|
||||
warnings.filterwarnings("ignore", category=UserWarning, module=pl.__name__)
|
||||
|
||||
# Also, set up a log to record debug messages for failed trials
|
||||
import logging
|
||||
|
||||
logging.basicConfig(filename="debug.log", encoding="utf-8", level=logging.ERROR)
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Patching PyTorchLightning</h3>
|
||||
# <p>
|
||||
# A key part of this project was to develop a patch for PyTorchLightning to allow for the use of <code>`dask`</code> arrays as inputs. It was important that PyTorchLightning can accept <code>`dask`</code> arrays and only load the data into memory when needed. Otherwise, our extremely large datasets would simply crash our system as they are significantly larger than the available RAM and VRAM.
|
||||
# </p><p>
|
||||
# After several versions of the patch, this final version was developed. It is a simple monkey patch that wraps the <code>pytorch_lightning.utlities.data._extract_batch_size</code> generator with a check that mimics the expected behaviour for torch tensors when given a dask array and extends its type signature to ensure static analysis is still possible.
|
||||
# </p><p>
|
||||
# With this patch applied, the forward method in our model can accept a dask array and only compute each chunk of the array when needed. This allows us to train our model on datasets that are significantly larger than the available memory.
|
||||
# </p>
|
||||
|
||||
# %%
|
||||
# Monkey patch to allow pytorch lightning to accept a dask array as a model input
|
||||
from typing import Any, Generator, Iterable, Mapping, Optional, Union
|
||||
|
||||
BType = Union[da.Array, torch.Tensor, str, Mapping[Any, "BType"], Iterable["BType"]]
|
||||
|
||||
unpatched = pl.utilities.data._extract_batch_size
|
||||
|
||||
|
||||
def patch(batch: BType) -> Generator[Optional[int], None, None]:
|
||||
if isinstance(batch, da.core.Array):
|
||||
if len(batch.shape) == 0:
|
||||
yield 1
|
||||
else:
|
||||
yield batch.shape[0]
|
||||
else:
|
||||
yield from unpatched(batch)
|
||||
|
||||
|
||||
pl.utilities.data._extract_batch_size = patch
|
||||
|
||||
# %%
|
||||
# Set the device to use with torch
|
||||
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
||||
# Prepare a dask cluster and client
|
||||
def create_client():
|
||||
cluster = LocalCluster(n_workers=2, threads_per_worker=1)
|
||||
client = Client(cluster)
|
||||
return client
|
||||
|
||||
if __name__ == "__main__":
|
||||
client = create_client()
|
||||
|
||||
# %%
|
||||
# Load X and y for training
|
||||
samples = list(range(1, 82))
|
||||
|
||||
with open("sample_X.pkl", "rb") as f:
|
||||
X = pickle.load(f)
|
||||
|
||||
with open("sample_y.pkl", "rb") as f:
|
||||
y = pickle.load(f)
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Dataset Splitting</h3>
|
||||
# <p>The dataset is split into a training and validation dataset (80:20 split). Because the number of available samples is extremely small, we haven't produced a test dataset. In the future, as more data is obtained, a test set should be included whenever possible.</p>
|
||||
|
||||
# %%
|
||||
# Separate samples into training and validation sets
|
||||
val_samples = random.sample(samples, k=len(samples) // 5)
|
||||
train_samples = [s for s in samples if s not in val_samples]
|
||||
|
||||
X_train = {i: X[i] for i in train_samples}
|
||||
X_val = {i: X[i] for i in val_samples}
|
||||
y_train = {i: y[i] for i in train_samples}
|
||||
y_val = {i: y[i] for i in val_samples}
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Dataset Collation</h3>
|
||||
# <p>This function returns a closure for collating our data in a torch DataLoader. The use of a DataLoader will allow us to shuffle and prefetch data, reducing overfitting and maximising performance as IO will be a bottleneck. The closure is dynamically constructed, allowing us to select the outputs we train against. However, for this experiment we will match against all outputs for simplicity.</p>
|
||||
|
||||
# %%
|
||||
# Create a function to dynamically modify data collation
|
||||
def collate_fn(batch):
|
||||
X0 = batch[0][0][0].to_numpy(dtype=np.float32)[0]
|
||||
X1 = batch[0][0][1].to_dask_array(lengths=True)
|
||||
y = batch[0][1].to_numpy(dtype=np.float32)
|
||||
return (
|
||||
torch.from_numpy(X0).to(device),
|
||||
X1,
|
||||
torch.from_numpy(y).to(device),
|
||||
)
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Convolutional Data Compression</h3>
|
||||
# <p>
|
||||
# The <code>`DaskCompression`</code> module accepts a dask array, and applies a convolutional kernel to it to significantly compress the input data. This allows us to transform a larger than VRAM dataset into one that can fit on our GPU, and (hopefully) retain the relevant information to train the rest of our model on.
|
||||
# </p><p>
|
||||
# Note how the kernel is only computed in line 12 and is immediately compressed via convolution. This ensures that only one kernel needs to be stored in memory at a time, avoiding the need to hold the entire dataset in memory at once.
|
||||
# </p>
|
||||
|
||||
# %%
|
||||
class DaskCompression(nn.Module):
|
||||
def __init__(
|
||||
self, in_channels, out_channels, kernel_size, chunk_size=1, device=device
|
||||
):
|
||||
super(DaskCompression, self).__init__()
|
||||
self.kernel_size = kernel_size
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels
|
||||
self.chunk_size = chunk_size
|
||||
self.device = device
|
||||
self.conv = nn.Conv1d(in_channels, out_channels, kernel_size).to(device)
|
||||
|
||||
def compress_kernel(self, kernel):
|
||||
return (
|
||||
self.conv(torch.from_numpy(kernel.compute()).to(self.device))
|
||||
.squeeze()
|
||||
.to("cpu") # return to cpu to save VRAM
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
# Precompute the dimensions of the output array
|
||||
dim0, dim2 = x.shape
|
||||
assert dim2 == self.in_channels
|
||||
dim0 = (dim0 // self.kernel_size) // self.chunk_size
|
||||
x = x.reshape(dim0, self.chunk_size, self.kernel_size, dim2)
|
||||
x = da.transpose(x, axes=(0, 1, 3, 2))
|
||||
|
||||
x = [self.compress_kernel(kernel) for kernel in x]
|
||||
return torch.stack(x).to(self.device)
|
||||
|
||||
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Model Design</h3>
|
||||
# <p>
|
||||
# The model was designed to be a dynamically constructed, hyperparameter driven model for ease of hyperparameter optimisation. The contructed model will process data in the following way:
|
||||
# </p>
|
||||
# <ol>
|
||||
# <li>The input is left/right padded to a multiple of the compressor kernel size</li>
|
||||
# <li>The dask array is compressed by a <code>`DaskCompressor`</code> layer, treating each input as a channel</li>
|
||||
# <li>The compressed array is then recursively convoluted down to a size less than or equal to the width of our feedforward network</li>
|
||||
# <li>The channels of the now convolved data are combined</li>
|
||||
# <li>The combined, flattened data is then left/right padded to the width of the feedforward network</li>
|
||||
# <li>Finally, the data is fed into a feedforward network</li>
|
||||
# </ol>
|
||||
# <p>
|
||||
# This relatively simple design allows the network to accept both larger-than-ram datasets as inputs, and datasets of variable sizes. This makes it suitable for training on whole Aconity datasets, without the need for culling or binning.
|
||||
# </p>
|
||||
|
||||
# %%
|
||||
class Model(pl.LightningModule):
|
||||
def __init__(
|
||||
self,
|
||||
# pl attributes
|
||||
optimizer=torch.optim.Adam,
|
||||
optimizer_args=(),
|
||||
optimizer_kwargs={},
|
||||
scheduler=None,
|
||||
scheduler_kwargs={},
|
||||
loss=torch.nn.MSELoss(),
|
||||
train_ds=None,
|
||||
val_ds=None,
|
||||
# model args & kwargs
|
||||
compressor_kernel_size=128,
|
||||
compressor_chunk_size=128,
|
||||
compressor_act=(nn.ReLU, (), {}),
|
||||
conv_kernel_size=128,
|
||||
conv_norm=False,
|
||||
conv_act=(nn.ReLU, (), {}),
|
||||
channel_combine_act=(nn.ReLU, (), {}),
|
||||
param_ff_depth=4,
|
||||
param_ff_width=16,
|
||||
param_ff_act=(nn.ReLU, (), {}),
|
||||
ff_width=512,
|
||||
ff_depth=4,
|
||||
ff_act=(nn.ReLU, (), {}),
|
||||
out_size=6,
|
||||
out_act=(nn.ReLU, (), {}),
|
||||
):
|
||||
super().__init__()
|
||||
# Assign necessary attributes for pl model
|
||||
self.optimizer = optimizer
|
||||
self.optimizer_args = optimizer_args
|
||||
self.optimizer_kwargs = optimizer_kwargs
|
||||
self.scheduler = scheduler
|
||||
self.scheduler_kwargs = scheduler_kwargs
|
||||
self.loss = loss
|
||||
self.train_ds = train_ds
|
||||
self.val_ds = val_ds
|
||||
# Attrs for dynamically created model to be tested
|
||||
self.compressor_kernel_size = compressor_kernel_size
|
||||
self.compressor_chunk_size = compressor_chunk_size
|
||||
self.conv_kernel_size = conv_kernel_size
|
||||
self.ff_width = ff_width
|
||||
self.ff_depth = ff_depth
|
||||
self.out_size = out_size
|
||||
# layers
|
||||
# compressor compresses and converts dask array to torch tensor
|
||||
self.convolutional_compressor = DaskCompression(
|
||||
5,
|
||||
5,
|
||||
kernel_size=compressor_kernel_size,
|
||||
chunk_size=compressor_chunk_size,
|
||||
)
|
||||
self.compressor_act = compressor_act[0](*compressor_act[1], **compressor_act[2])
|
||||
# convolutional layer recursively applies convolutions to the compressed input
|
||||
self.conv = nn.Conv1d(5, 5, kernel_size=conv_kernel_size)
|
||||
self.conv_norm = nn.LocalResponseNorm(5) if conv_norm else nn.Identity()
|
||||
self.conv_act = conv_act[0](*conv_act[1], **conv_act[2])
|
||||
self.combine_channels = nn.Conv1d(5, 1, kernel_size=1)
|
||||
self.channel_combine_act = channel_combine_act[0](
|
||||
*channel_combine_act[1], **channel_combine_act[2]
|
||||
)
|
||||
self.param_ff = nn.Sequential(
|
||||
nn.Linear(4, param_ff_width),
|
||||
param_ff_act[0](*param_ff_act[1], **param_ff_act[2]),
|
||||
*chain(
|
||||
*(
|
||||
(
|
||||
nn.Linear(param_ff_width, param_ff_width),
|
||||
param_ff_act[0](*param_ff_act[1], **param_ff_act[2]),
|
||||
)
|
||||
for _ in range(param_ff_depth)
|
||||
)
|
||||
),
|
||||
)
|
||||
self.ff = nn.Sequential(
|
||||
nn.Linear(ff_width + param_ff_width, ff_width),
|
||||
ff_act[0](*ff_act[1], **ff_act[2]),
|
||||
*chain(
|
||||
*(
|
||||
(
|
||||
nn.Linear(ff_width, ff_width),
|
||||
ff_act[0](*ff_act[1], **ff_act[2]),
|
||||
)
|
||||
for _ in range(ff_depth)
|
||||
)
|
||||
),
|
||||
)
|
||||
self.out_dense = nn.Linear(ff_width, out_size)
|
||||
self.out_act = out_act[0](*out_act[1], **out_act[2])
|
||||
|
||||
@staticmethod
|
||||
def pad_ax0_to_multiple_of(x, multiple_of):
|
||||
padding = (((x.shape[0] // multiple_of) + 1) * multiple_of) - x.shape[0]
|
||||
left_pad = padding // 2
|
||||
right_pad = padding - left_pad
|
||||
return da.pad(
|
||||
x, ((left_pad, right_pad), (0, 0)), mode="constant", constant_values=0
|
||||
)
|
||||
|
||||
def pad_to_ff_width(self, x):
|
||||
padding = self.ff_width - x.shape[1]
|
||||
left_pad = padding // 2
|
||||
right_pad = padding - left_pad
|
||||
return F.pad(
|
||||
x,
|
||||
(right_pad, left_pad, 0, 0),
|
||||
mode="constant",
|
||||
value=0.0,
|
||||
)
|
||||
|
||||
def forward(self, x0, x1):
|
||||
# pad to a multiple of kernel_size * chunk_size
|
||||
x1 = self.pad_ax0_to_multiple_of(
|
||||
x1, self.compressor_kernel_size * self.compressor_chunk_size
|
||||
)
|
||||
x1 = self.convolutional_compressor(x1)
|
||||
x1 = x1.reshape(x1.shape[0] * x1.shape[1], x1.shape[2]).T.unsqueeze(0)
|
||||
while x1.shape[2] > self.ff_width:
|
||||
x1 = self.conv(x1)
|
||||
x1 = self.conv_norm(x1)
|
||||
x1 = self.conv_act(x1)
|
||||
x1 = self.combine_channels(x1)
|
||||
x1 = self.channel_combine_act(x1)
|
||||
x1 = x1.squeeze(1)
|
||||
x1 = self.pad_to_ff_width(x1)
|
||||
x0 = x0.unsqueeze(0)
|
||||
x0 = self.param_ff(x0)
|
||||
x = torch.cat((x1, x0), dim=1)
|
||||
x = self.ff(x)
|
||||
x = self.out_dense(x)
|
||||
x = self.out_act(x)
|
||||
return x
|
||||
|
||||
def configure_optimizers(self):
|
||||
optimizer = self.optimizer(
|
||||
self.parameters(), *self.optimizer_args, **self.optimizer_kwargs
|
||||
)
|
||||
if self.scheduler is not None:
|
||||
scheduler = self.scheduler(optimizer, **self.scheduler_kwargs)
|
||||
return optimizer, scheduler
|
||||
else:
|
||||
return optimizer
|
||||
|
||||
def train_dataloader(self):
|
||||
return self.train_ds
|
||||
|
||||
def val_dataloader(self):
|
||||
return self.val_ds
|
||||
|
||||
def training_step(self, batch, batch_idx):
|
||||
x0, x1, y = batch
|
||||
y_hat = self(x0, x1)
|
||||
loss = self.loss(y_hat, y)
|
||||
self.log("train_loss", loss)
|
||||
return loss
|
||||
|
||||
def validation_step(self, batch, batch_idx):
|
||||
x0, x1, y = batch
|
||||
y_hat = self(x0, x1)
|
||||
loss = self.loss(y_hat, y)
|
||||
self.log("val_loss", loss)
|
||||
return loss
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Activation Functions</h3>
|
||||
# <p>
|
||||
# For our hyperparameter optimisation, we intend to test all the activation functions in PyTorch. In addition to the builtin activations, we will also train using the following custom implemented activation functions from literature or our own design:
|
||||
# </p>
|
||||
# <ol>
|
||||
# <li><b><code>BMU</code>:</b> Bio-Mimicking Unit; an activation function designed to mimic the activation potential of a biological neuron.</li>
|
||||
# <li><b><code>SoftExp</code>:</b> Soft Exponential function; a parametric activation function that fits to a wide variety of exponential curves (DOI: <a href=https://arxiv.org/abs/1602.01321v1>10.48550/arXiv.1602.01321</a>)</li>
|
||||
# <li><b><code>LeakyPReQU</code>:</b> Leaky Parametric Rectified Quadratic Unit; A smoothly and continuously differentiable function that is a parametrically sloped line for <code>x⋜0</code> and a quadratic curve for <code>x>0</code></li>
|
||||
# <li><b><code>ISRU</code>:</b> Inverse Square Root Unit; a somewhat uncommon function that can be useful in models such as this as it yields a continuously differentiable curve while being extremely fast to compute using bit manipulation</li>
|
||||
# <li><b><code>ISRLU</code>:</b> Inverse Square Root Linear Unit; a modified ISRU that is an ISRU for <code>x<0</code> and <code>`f(x)=x`</code> for <code>x⋝0</code> (DOI: <a href=https://arxiv.org/abs/1710.09967>10.48550/arXiv.1710.09967</a>)</li>
|
||||
# <li><b><code>PBessel</code>:</b> Parametric Besse; A parametric Bessel curve yielding various different wave formations depending on a trainable parameter</li>
|
||||
# <li><b><code>Sinusoid</code>:</b> A parametric sine wave, with amplitude and wavelength as trainable parameters</li>
|
||||
# <li><b><code>Modulo</code>:</b> A parametric sawtooth wave, <code>`f(x)=x%ɑ</code> where ɑ is a trainable parameter</li>
|
||||
# <li><b><code>TriWave</code>:</b> A parametric triangle wave, with amplitude and wavelength as trainable parameters</li>
|
||||
# <li><b><code>Gaussian</code>:</b> A parametric gaussian curve, with trainable amplitude</li>
|
||||
# </ol>
|
||||
|
||||
# %%
|
||||
# Create a dispatcher including all builtin activations and
|
||||
# Several custom activations from experimentation or literature
|
||||
from custom_activations import SoftExp, PBessel
|
||||
|
||||
|
||||
activation_dispatcher = {
|
||||
"Tanh": nn.Tanh,
|
||||
"SiLU": nn.SiLU,
|
||||
"Softplus": nn.Softplus,
|
||||
"SoftExp": SoftExp,
|
||||
"PBessel": PBessel,
|
||||
}
|
||||
|
||||
from boltons.dictutils import FrozenDict
|
||||
|
||||
out_act_dispatcher = {
|
||||
"Sigmoid": (nn.Sigmoid, (), {}),
|
||||
"Softmax": (nn.Softmax, (), {"dim": 1}),
|
||||
}
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Hyperparameter training</h3>
|
||||
# <p>Here, we define an objective function, describing what we want Optuna to do during each trial and how to react to various errors and/or situations that may arise. To summarise the objective:</p>
|
||||
# <ul>
|
||||
# <li>Optuna selects hyperparameters for all input parameters within the given constraints</li>
|
||||
# <li>A model is generated using the selected hyperparameters</li>
|
||||
# <li>PyTorchLightning trains the model through 2 epochs</li>
|
||||
# <li>The model is evaluated on the validation set</li>
|
||||
# <li>The validation loss is returned to Optuna</li>
|
||||
# </ul>
|
||||
# <p>
|
||||
# Optuna monitors the reported validation loss and attempts to minimise it. An extremely aggressive pruning strategy known as "hyperband pruning" is used to efficiently reduce down the parameter space to something more reasonable. Any parameter set which optuna deems suboptimal will be immediately pruned or even stopped early to save time.
|
||||
# </p>
|
||||
|
||||
# %%
|
||||
# Test parameters
|
||||
n_epochs = 2
|
||||
output_keys = list(next(iter(y_train.values())).keys())
|
||||
activation_vals = list(activation_dispatcher.keys())
|
||||
out_act_vals = list(out_act_dispatcher.keys())
|
||||
|
||||
|
||||
# Next we define the objective function for the hyperparameter optimization
|
||||
def objective(trial):
|
||||
torch.cuda.empty_cache()
|
||||
objective_value = torch.inf
|
||||
model = None
|
||||
logger = None
|
||||
try:
|
||||
# Select hyperparameters for testing
|
||||
compressor_kernel_size = 128
|
||||
compressor_chunk_size = 128
|
||||
compressor_act = (SoftExp, (), {})
|
||||
conv_kernel_size = 128
|
||||
conv_norm = False
|
||||
conv_act = (nn.Tanh, (), {})
|
||||
channel_combine_act = (nn.Softplus, (), {})
|
||||
param_ff_depth = 2
|
||||
param_ff_width = 16
|
||||
param_ff_act = (PBessel, (), {})
|
||||
ff_width = 1024
|
||||
ff_depth = 4
|
||||
ff_act = (nn.Softplus, (), {})
|
||||
out_size = 2
|
||||
out_act = out_act_dispatcher[trial.suggest_categorical("out_act", out_act_vals)]
|
||||
|
||||
# Set up the model architecture and other necessary components
|
||||
model = Model(
|
||||
compressor_kernel_size=compressor_kernel_size,
|
||||
compressor_chunk_size=compressor_chunk_size,
|
||||
compressor_act=compressor_act,
|
||||
conv_kernel_size=conv_kernel_size,
|
||||
conv_act=conv_act,
|
||||
conv_norm=conv_norm,
|
||||
channel_combine_act=channel_combine_act,
|
||||
param_ff_depth=param_ff_depth,
|
||||
param_ff_width=param_ff_width,
|
||||
param_ff_act=param_ff_act,
|
||||
ff_width=ff_width,
|
||||
ff_depth=ff_depth,
|
||||
ff_act=ff_act,
|
||||
out_size=out_size,
|
||||
out_act=out_act,
|
||||
).to(device)
|
||||
|
||||
trainer = Trainer(
|
||||
accelerator="gpu",
|
||||
max_epochs=n_epochs,
|
||||
devices=1,
|
||||
logger=logger,
|
||||
num_sanity_val_steps=0, # Needs to be disabled or else we get an error because X is dask array
|
||||
# precision="16-mixed",
|
||||
callbacks=[
|
||||
PyTorchLightningPruningCallback(trial, monitor="val_loss"),
|
||||
],
|
||||
)
|
||||
# Prepare datasets
|
||||
train = DataLoader(
|
||||
list(zip(X_train.values(), y_train.values())),
|
||||
collate_fn=collate_fn,
|
||||
shuffle=True,
|
||||
)
|
||||
valid = DataLoader(
|
||||
list(zip(X_val.values(), y_val.values())),
|
||||
shuffle=True,
|
||||
collate_fn=collate_fn,
|
||||
)
|
||||
# Finally, train the model
|
||||
trainer.fit(model, train, valid)
|
||||
except Exception as e:
|
||||
logging.exception(f"An exception occurred in trial {trial.number}: {e}")
|
||||
raise optuna.exceptions.TrialPruned()
|
||||
finally:
|
||||
if logger is not None:
|
||||
logger.experiment.unwatch(model)
|
||||
logger.experiment.finish()
|
||||
del model
|
||||
torch.cuda.empty_cache()
|
||||
if objective_value == torch.inf:
|
||||
raise optuna.exceptions.TrialPruned()
|
||||
return objective_value
|
||||
|
||||
# %% [markdown]
|
||||
# <h3>Hyperparameter Optimisation on a Computing Cluster</h3>
|
||||
# <p>
|
||||
# The final important step is to run the optimisation using a cluster of computers to maximise the number of trials that can be run in parallel. Although this could be achieved using a more complex, scheduler controlled system and dask, we will use the far simpler approach of using a shared SQL ledger to keep track of the trials and their results. This is a very simple approach, but it is sufficient for our purposes, and is easy to implement. Using this approach, the model was trained on a cluster of 5 computers at once.
|
||||
# </p>
|
||||
|
||||
# %%
|
||||
if __name__ == "__main__":
|
||||
# storage_name = "sqlite:///optuna.sql"
|
||||
storage_name = "mysql+pymysql://root:Ch31121992@192.168.1.10:3306/optuna_db"
|
||||
study_name = "Composition Experiment 2"
|
||||
study = optuna.create_study(
|
||||
study_name=study_name,
|
||||
storage=storage_name,
|
||||
direction="minimize",
|
||||
load_if_exists=True,
|
||||
)
|
||||
study.optimize(
|
||||
objective,
|
||||
n_trials=10,
|
||||
timeout=None,
|
||||
)
|
||||
|
||||
|
||||
7024
expt2_analysis.ipynb
Normal file
7024
expt2_analysis.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
398
expt3.ipynb
Normal file
398
expt3.ipynb
Normal file
@@ -0,0 +1,398 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<h1>Experiment 3</h1>\n",
|
||||
"<h3>Optimised model training</h3>\n",
|
||||
"<p>In experiment 3 the model was trained using the optimised hyperparameters. By examining the results of expt2, it was noticed that trials #1, #10, and #16 all resulted in quite low losses while also showing clear downward trends resembling a clearly discernible training curve. Of these, trial #16 was ultimately selected as the model to be tested, as the data suggests that <code>`in_act=Mish`</code> tends to give the lowest losses in most models tested. The parameters for trial #16 were as follows:</p>\n",
|
||||
"<ul>\n",
|
||||
"<li><b>in_act</b> = Mish</li>\n",
|
||||
"<li><b>compressor_kernel_size</b> = 128</li>\n",
|
||||
"<li><b>compressor_chunk_size</b> = 128</li>\n",
|
||||
"<li><b>compressor_act</b> = SoftExp</li>\n",
|
||||
"<li><b>conv_kernel_size</b> = 128</li>\n",
|
||||
"<li><b>conv_act</b> = Sigmoid</li>\n",
|
||||
"<li><b>channel_combine_act</b> = GELU</li>\n",
|
||||
"<li><b>ff_width</b> = 512</li>\n",
|
||||
"<li><b>ff_depth</b> = 2</li>\n",
|
||||
"<li><b>ff_act</b> = CELU</li>\n",
|
||||
"<li><b>out_act</b> = Tanhshrink</li>\n",
|
||||
"</ul>\n",
|
||||
"<p>\n",
|
||||
"Because most of the training curves in expt2 appeared to be unstable, a learning rate scheduler was used to reduce the learning rate by 20% if the validation loss did not improve for 5 epochs. The model was checkpointed, with the best 10 iterations of the model being retained for testing after training.\n",
|
||||
"</p>\n",
|
||||
"<h3>Modified optimal model training</h3>\n",
|
||||
"<p>\n",
|
||||
"Following the first attempt at training the optimised model (Model 1, Test 1), it was noted that training curves were clearly discernible, but still quite unstable and noisy. To try and further improve the stability of the training, a modified version of the model was prepared and trained (Model 2, Test 2). The modified model was the same as Model 1, but with the addition of a LayerNormalization layer to the convolutional layer of the <code>`DaskCompressor`</code> submodule. This change was made because highly recurrent submodules such as the compressor are known to be especially prone to instability caused by vanishing or exploding gradients. It was reasoned that by normalizing at each iteration the gradients would be less likely to vanish or explode, making the training more stable.\n",
|
||||
"</p>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Data handling imports\n",
|
||||
"from dask.distributed import Client, LocalCluster\n",
|
||||
"import dask\n",
|
||||
"import dask.dataframe as dd\n",
|
||||
"import dask.array as da\n",
|
||||
"import numpy as np\n",
|
||||
"import pickle\n",
|
||||
"import random\n",
|
||||
"from itertools import chain\n",
|
||||
"from tqdm.auto import tqdm\n",
|
||||
"\n",
|
||||
"# Deep learning imports\n",
|
||||
"import torch\n",
|
||||
"from torch.utils.data import DataLoader\n",
|
||||
"from torch import nn\n",
|
||||
"from torch.nn import functional as F\n",
|
||||
"from torch import optim\n",
|
||||
"import pytorch_lightning as pl\n",
|
||||
"from pytorch_lightning import Trainer\n",
|
||||
"from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint\n",
|
||||
"from pytorch_lightning.loggers import WandbLogger\n",
|
||||
"\n",
|
||||
"# Suppress some warning messages from pytorch_lightning,\n",
|
||||
"# It really doesn't like that i've forced it to handle a dask array!\n",
|
||||
"import warnings\n",
|
||||
"\n",
|
||||
"warnings.filterwarnings(\"ignore\", category=UserWarning, module=pl.__name__)\n",
|
||||
"\n",
|
||||
"# Also, set up a log to record debug messages for failed trials\n",
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"logging.basicConfig(filename=\"debug.log\", encoding=\"utf-8\", level=logging.DEBUG)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from expt1 import (\n",
|
||||
" Model,\n",
|
||||
" device,\n",
|
||||
" X_train,\n",
|
||||
" y_train,\n",
|
||||
" X_val,\n",
|
||||
" y_val,\n",
|
||||
" create_collate_fn,\n",
|
||||
")\n",
|
||||
"from custom_activations import SoftExp"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/distributed/node.py:182: UserWarning: Port 8787 is already in use.\n",
|
||||
"Perhaps you already have a cluster running?\n",
|
||||
"Hosting the HTTP server on port 34477 instead\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"cluster = LocalCluster(n_workers=8, threads_per_worker=1)\n",
|
||||
"client = Client(cluster)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Monkey patch to allow pytorch lightning to accept a dask array as a model input\n",
|
||||
"from typing import Any, Generator, Iterable, Mapping, Optional, Union\n",
|
||||
"\n",
|
||||
"BType = Union[da.Array, torch.Tensor, str, Mapping[Any, \"BType\"], Iterable[\"BType\"]]\n",
|
||||
"\n",
|
||||
"unpatched = pl.utilities.data._extract_batch_size\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def patch(batch: BType) -> Generator[Optional[int], None, None]:\n",
|
||||
" if isinstance(batch, da.core.Array):\n",
|
||||
" if len(batch.shape) == 0:\n",
|
||||
" yield 1\n",
|
||||
" else:\n",
|
||||
" yield batch.shape[0]\n",
|
||||
" else:\n",
|
||||
" yield from unpatched(batch)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"pl.utilities.data._extract_batch_size = patch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Prepare datasets\n",
|
||||
"train = DataLoader(\n",
|
||||
" list(zip(X_train.values(), y_train.values())),\n",
|
||||
" collate_fn=create_collate_fn(),\n",
|
||||
" shuffle=True,\n",
|
||||
")\n",
|
||||
"valid = DataLoader(\n",
|
||||
" list(zip(X_val.values(), y_val.values())),\n",
|
||||
" shuffle=True,\n",
|
||||
" collate_fn=create_collate_fn(),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set up the model architecture and other necessary components\n",
|
||||
"model = Model(\n",
|
||||
" # Training parameters\n",
|
||||
" optimizer=optim.Adam,\n",
|
||||
" scheduler=optim.lr_scheduler.ReduceLROnPlateau,\n",
|
||||
" scheduler_kwargs={\"factor\": 0.8, \"patience\": 5},\n",
|
||||
" # Model parameters\n",
|
||||
" in_act=(nn.Mish, list(), dict()),\n",
|
||||
" compressor_kernel_size=128,\n",
|
||||
" compressor_chunk_size=128,\n",
|
||||
" compressor_act=(SoftExp, list(), dict()),\n",
|
||||
" conv_kernel_size=128,\n",
|
||||
" conv_act=(nn.Sigmoid, list(), dict()),\n",
|
||||
" channel_combine_act=(nn.GELU, list(), dict()),\n",
|
||||
" ff_width=512,\n",
|
||||
" ff_depth=2,\n",
|
||||
" ff_act=(nn.CELU, list(), dict()),\n",
|
||||
" out_size=len(list(next(iter(y_train.values())).keys())),\n",
|
||||
" out_act=(nn.Tanhshrink, list(), dict()),\n",
|
||||
").to(device)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mchughes000\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "6d1624339b4c4aaeb195b5ebc3b3e69e",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016669258750092317, max=1.0…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"wandb version 0.15.8 is available! To upgrade, please run:\n",
|
||||
" $ pip install wandb --upgrade"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Tracking run with wandb version 0.15.7"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Run data is saved locally in <code>./wandb/run-20230801_233841-q70oibx2</code>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Syncing run <strong><a href='https://wandb.ai/chughes000/Aconity_ML_Test_DryRun/runs/q70oibx2' target=\"_blank\">Test 2</a></strong> to <a href='https://wandb.ai/chughes000/Aconity_ML_Test_DryRun' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
" View project at <a href='https://wandb.ai/chughes000/Aconity_ML_Test_DryRun' target=\"_blank\">https://wandb.ai/chughes000/Aconity_ML_Test_DryRun</a>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
" View run at <a href='https://wandb.ai/chughes000/Aconity_ML_Test_DryRun/runs/q70oibx2' target=\"_blank\">https://wandb.ai/chughes000/Aconity_ML_Test_DryRun/runs/q70oibx2</a>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"GPU available: True (cuda), used: True\n",
|
||||
"TPU available: False, using: 0 TPU cores\n",
|
||||
"IPU available: False, using: 0 IPUs\n",
|
||||
"HPU available: False, using: 0 HPUs\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"early_stop_callback = EarlyStopping(\n",
|
||||
" monitor=\"val_loss\", patience=15, verbose=False, mode=\"min\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"checkpoint_callback = ModelCheckpoint(\n",
|
||||
" monitor=\"val_loss\",\n",
|
||||
" dirpath=\"./checkpoints\",\n",
|
||||
" filename=\"checkpoint-{epoch:02d}-{val_loss:.2f}\",\n",
|
||||
" save_top_k=10,\n",
|
||||
" mode=\"min\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"logger = WandbLogger(project=\"Aconity_ML_Test_DryRun\", name=f\"Test 1\")\n",
|
||||
"logger.experiment.watch(model, log=\"all\", log_freq=1)\n",
|
||||
"\n",
|
||||
"trainer = Trainer(\n",
|
||||
" accelerator=\"gpu\",\n",
|
||||
" max_epochs=-1,\n",
|
||||
" devices=\"auto\",\n",
|
||||
" strategy=\"auto\",\n",
|
||||
" logger=logger,\n",
|
||||
" callbacks=[checkpoint_callback, early_stop_callback],\n",
|
||||
" num_sanity_val_steps=0, # Needs to be disabled or else we get an error because X is dask array\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
|
||||
"/home/cianh/Programming/Git_Projects/Aconity_ML_Test/.venv/lib/python3.11/site-packages/pytorch_lightning/core/optimizer.py:361: RuntimeWarning: Found unsupported keys in the optimizer configuration: {'scheduler'}\n",
|
||||
" rank_zero_warn(\n",
|
||||
"\n",
|
||||
" | Name | Type | Params\n",
|
||||
"--------------------------------------------------------------\n",
|
||||
"0 | loss | MSELoss | 0 \n",
|
||||
"1 | in_act | Mish | 0 \n",
|
||||
"2 | convolutional_compressor | DaskCompression | 3.2 K \n",
|
||||
"3 | compressor_act | SoftExp | 1 \n",
|
||||
"4 | conv | Conv1d | 3.2 K \n",
|
||||
"5 | conv_act | Sigmoid | 0 \n",
|
||||
"6 | combine_channels | Conv1d | 6 \n",
|
||||
"7 | channel_combine_act | GELU | 0 \n",
|
||||
"8 | ff | Sequential | 525 K \n",
|
||||
"9 | out_dense | Linear | 11.8 K\n",
|
||||
"10 | out_act | Tanhshrink | 0 \n",
|
||||
"--------------------------------------------------------------\n",
|
||||
"543 K Trainable params\n",
|
||||
"0 Non-trainable params\n",
|
||||
"543 K Total params\n",
|
||||
"2.174 Total estimated model params size (MB)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "25d7ba2f5e3c4f68a55fdafed5a5b092",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Training: 0it [00:00, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Finally, train the model\n",
|
||||
"trainer.fit(model, train, valid)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
101
expt3.py
Normal file
101
expt3.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# From expt2 selected trials ???
|
||||
# Data handling imports
|
||||
from dask.distributed import Client, LocalCluster
|
||||
import dask.array as da
|
||||
|
||||
# Deep learning imports
|
||||
import torch
|
||||
from torch.utils.data import DataLoader
|
||||
from torch import nn
|
||||
from torch import optim
|
||||
import pytorch_lightning as pl
|
||||
from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
|
||||
from pytorch_lightning.loggers import WandbLogger
|
||||
|
||||
from expt1 import (
|
||||
Model,
|
||||
device,
|
||||
X_train,
|
||||
y_train,
|
||||
X_val,
|
||||
y_val,
|
||||
collate_fn,
|
||||
)
|
||||
from custom_activations import SoftExp, PBessel
|
||||
|
||||
# Suppress some warning messages from pytorch_lightning,
|
||||
# It really doesn't like that i've forced it to handle a dask array!
|
||||
import warnings
|
||||
import logging
|
||||
|
||||
warnings.filterwarnings("ignore", category=UserWarning, module=pl.__name__)
|
||||
# Also, set up a log to record debug messages for failed trials
|
||||
logging.basicConfig(filename="debug.log", encoding="utf-8", level=logging.ERROR)
|
||||
|
||||
if __name__ == "__main__":
|
||||
cluster = LocalCluster(n_workers=8, threads_per_worker=1)
|
||||
client = Client(cluster)
|
||||
|
||||
|
||||
# Prepare datasets
|
||||
train = DataLoader(
|
||||
list(zip(X_train.values(), y_train.values())),
|
||||
collate_fn=collate_fn,
|
||||
shuffle=True,
|
||||
)
|
||||
valid = DataLoader(
|
||||
list(zip(X_val.values(), y_val.values())),
|
||||
shuffle=True,
|
||||
collate_fn=collate_fn,
|
||||
)
|
||||
|
||||
# Set up the model architecture and other necessary components
|
||||
model = Model(
|
||||
# Training parameters
|
||||
optimizer=optim.Adam,
|
||||
# Model parameters
|
||||
compressor_kernel_size=128,
|
||||
compressor_chunk_size=128,
|
||||
compressor_act=(SoftExp, (), {}),
|
||||
conv_kernel_size=128,
|
||||
conv_act=(nn.Tanh, (), {}),
|
||||
conv_norm=False,
|
||||
channel_combine_act=(nn.Softplus, (), {}),
|
||||
param_ff_depth=2,
|
||||
param_ff_width=16,
|
||||
param_ff_act=(PBessel, (), {}),
|
||||
ff_width=1024,
|
||||
ff_depth=6,
|
||||
ff_act=(nn.Softplus, (), {}),
|
||||
out_size=2,
|
||||
out_act=(nn.Sigmoid, tuple(), dict()),
|
||||
).to(device)
|
||||
|
||||
if __name__ == "__main__":
|
||||
early_stop_callback = EarlyStopping(
|
||||
monitor="val_loss", patience=15, verbose=False, mode="min"
|
||||
)
|
||||
|
||||
checkpoint_callback = ModelCheckpoint(
|
||||
monitor="val_loss",
|
||||
dirpath="./checkpoints",
|
||||
filename="checkpoint-{epoch:02d}-{val_loss:.2f}",
|
||||
save_top_k=10,
|
||||
mode="min",
|
||||
)
|
||||
|
||||
logger = WandbLogger(project="Aconity_ML_Expt1", name="Test 3")
|
||||
logger.experiment.watch(model, log="all", log_freq=1)
|
||||
|
||||
trainer = Trainer(
|
||||
accelerator="gpu",
|
||||
max_epochs=-1,
|
||||
devices="auto",
|
||||
strategy="auto",
|
||||
logger=logger,
|
||||
callbacks=[checkpoint_callback, early_stop_callback],
|
||||
num_sanity_val_steps=0, # Disabled or we get error because X is dask array
|
||||
)
|
||||
# Finally, train the model
|
||||
trainer.fit(model, train, valid)
|
||||
1229
expt3_analysis.ipynb
Normal file
1229
expt3_analysis.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
1382
process_X_data.ipynb
Normal file
1382
process_X_data.ipynb
Normal file
File diff suppressed because one or more lines are too long
213
process_y_data.ipynb
Normal file
213
process_y_data.ipynb
Normal file
@@ -0,0 +1,213 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import pickle"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Read the excel file\n",
|
||||
"doe_df = pd.read_excel(\n",
|
||||
" \"data/NiTi_Cubes_Analysis.xlsx\",\n",
|
||||
" sheet_name=\"DOE & RSPNS\",\n",
|
||||
" header=1,\n",
|
||||
" usecols=\"A:M, T:AC\",\n",
|
||||
" nrows=81,\n",
|
||||
")\n",
|
||||
"# Remove newlines from column names\n",
|
||||
"doe_df.rename(\n",
|
||||
" mapper=dict(zip(doe_df.keys(), (k.replace(\"\\n\", \" \") for k in doe_df.keys()))),\n",
|
||||
" axis=1,\n",
|
||||
" inplace=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"doe_df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Split the dataframe into a dictionary of dataframes, one for each sample\n",
|
||||
"sample_y = dict(iter(doe_df.groupby(\"Sample\")))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sample_y[1]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Finally, pickle this data for use in experiments\n",
|
||||
"with open(\"sample_y.pkl\", \"wb\") as f:\n",
|
||||
" pickle.dump(sample_y, f)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Sample</th>\n",
|
||||
" <th>Laser power, P\\n(W)</th>\n",
|
||||
" <th>Scan speed, V\\n(mm/sec)</th>\n",
|
||||
" <th>Spot size, F\\n(µm)</th>\n",
|
||||
" <th>Hatch spacing, H\\n(µm)</th>\n",
|
||||
" <th>Surface Energy Density @ 90µm Layer thickness, El (J/mm2)</th>\n",
|
||||
" <th>Surface Energy Density @ Spot size, EF (J/mm2)</th>\n",
|
||||
" <th>Vol. Energy Density @ Hatch Spacing, VEDH (J/mm3)</th>\n",
|
||||
" <th>Vol. Energy Density @ Spot Size, VEDF (J/mm3)</th>\n",
|
||||
" <th>Density\\n(Archimedes by Acetone)</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>Ni</th>\n",
|
||||
" <th>Ti</th>\n",
|
||||
" <th>Oxygen</th>\n",
|
||||
" <th>Carbon</th>\n",
|
||||
" <th>Ni (Norm)</th>\n",
|
||||
" <th>Ti (Norm)</th>\n",
|
||||
" <th>Sa (um)</th>\n",
|
||||
" <th>Sku</th>\n",
|
||||
" <th>Ssk</th>\n",
|
||||
" <th>Sz (um)</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>180</td>\n",
|
||||
" <td>1000</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>4.5</td>\n",
|
||||
" <td>50.0</td>\n",
|
||||
" <td>50.0</td>\n",
|
||||
" <td>6.343695</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>41.33</td>\n",
|
||||
" <td>43.76</td>\n",
|
||||
" <td>1.1</td>\n",
|
||||
" <td>13.81</td>\n",
|
||||
" <td>48.5721</td>\n",
|
||||
" <td>51.4279</td>\n",
|
||||
" <td>18.686</td>\n",
|
||||
" <td>3.243</td>\n",
|
||||
" <td>0.28</td>\n",
|
||||
" <td>187.116</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>1 rows × 23 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Sample Laser power, P\\n(W) Scan speed, V\\n(mm/sec) Spot size, F\\n(µm) \\\n",
|
||||
"0 1 180 1000 40 \n",
|
||||
"\n",
|
||||
" Hatch spacing, H\\n(µm) \\\n",
|
||||
"0 40 \n",
|
||||
"\n",
|
||||
" Surface Energy Density @ 90µm Layer thickness, El (J/mm2) \\\n",
|
||||
"0 2.0 \n",
|
||||
"\n",
|
||||
" Surface Energy Density @ Spot size, EF (J/mm2) \\\n",
|
||||
"0 4.5 \n",
|
||||
"\n",
|
||||
" Vol. Energy Density @ Hatch Spacing, VEDH (J/mm3) \\\n",
|
||||
"0 50.0 \n",
|
||||
"\n",
|
||||
" Vol. Energy Density @ Spot Size, VEDF (J/mm3) \\\n",
|
||||
"0 50.0 \n",
|
||||
"\n",
|
||||
" Density\\n(Archimedes by Acetone) ... Ni Ti Oxygen Carbon \\\n",
|
||||
"0 6.343695 ... 41.33 43.76 1.1 13.81 \n",
|
||||
"\n",
|
||||
" Ni (Norm) Ti (Norm) Sa (um) Sku Ssk Sz (um) \n",
|
||||
"0 48.5721 51.4279 18.686 3.243 0.28 187.116 \n",
|
||||
"\n",
|
||||
"[1 rows x 23 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sample_y[1]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
53
pyproject.toml
Normal file
53
pyproject.toml
Normal file
@@ -0,0 +1,53 @@
|
||||
[tool.poetry]
|
||||
name = "aconity-ml-test"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["Cian Hughes <cian.hughes@dcu.ie>"]
|
||||
readme = "README.md"
|
||||
# packages = [{include = "aconity_ml_test"}]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.9,<3.12"
|
||||
read_layers = { file = "../MTPy/wheels/read_layers-0.1.0-cp311-cp311-manylinux_2_34_x86_64.whl" }
|
||||
dask = { extras = ["distributed", "graphviz"], version = "*" }
|
||||
pytorch-lightning = "^2.0.6"
|
||||
xgboost = "^1.7.6"
|
||||
optuna = "^3.2.0"
|
||||
wandb = "^0.15.7"
|
||||
numba = "^0.57.1"
|
||||
tqdm = "^4.65.0"
|
||||
matplotlib = "^3.7.2"
|
||||
plotly = "^5.15.0"
|
||||
bokeh = "^3.2.1"
|
||||
holoviews = "^1.17.0"
|
||||
datashader = "^0.15.1"
|
||||
psutil = "^5.9.5"
|
||||
pandas = "^2.0.3"
|
||||
tables = "^3.8.0"
|
||||
lz4 = "^4.3.2"
|
||||
openpyxl = "^3.1.2"
|
||||
odfpy = "^1.4.1"
|
||||
fsspec = "^2023.6.0"
|
||||
jupyterlab = "^4.0.3"
|
||||
jupyter = "^1.0.0"
|
||||
ipywidgets = "^8.0.7"
|
||||
pyarrow = "^12.0.1"
|
||||
jupyter-bokeh = "^3.0.7"
|
||||
torch = { file = "./wheel/torch-2.0.1+cu118-cp311-cp311-linux_x86_64.whl" }
|
||||
optuna-dashboard = "^0.10.3"
|
||||
pymysql = "^1.1.0"
|
||||
mysqlclient = "^2.2.0"
|
||||
tabulate = "^0.9.0"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[[tool.poetry.source]]
|
||||
name = "PyPI"
|
||||
priority = "primary"
|
||||
|
||||
# [[tool.poetry.source]]
|
||||
# name = "nvidia"
|
||||
# url = "https://pypi.ngc.nvidia.com"
|
||||
# priority = "primary"
|
||||
Reference in New Issue
Block a user