diff --git a/poetry.lock b/poetry.lock index 289607b..b496e27 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2321,6 +2321,24 @@ dev = ["black", "flake8", "isort", "pre-commit"] doc = ["myst-parser", "sphinx", "sphinx-book-theme"] test = ["coverage", "pytest", "pytest-cov"] +[[package]] +name = "loguru" +version = "0.7.2" +description = "Python logging made (stupidly) simple" +optional = false +python-versions = ">=3.5" +files = [ + {file = "loguru-0.7.2-py3-none-any.whl", hash = "sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb"}, + {file = "loguru-0.7.2.tar.gz", hash = "sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac"}, +] + +[package.dependencies] +colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} +win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} + +[package.extras] +dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.4.1)", "mypy (==v1.5.1)", "pre-commit (==3.4.0)", "pytest (==6.1.2)", "pytest (==7.4.0)", "pytest-cov (==2.12.1)", "pytest-cov (==4.1.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.0.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.3.0)", "tox (==3.27.1)", "tox (==4.11.0)"] + [[package]] name = "mako" version = "1.3.5" @@ -5341,6 +5359,20 @@ files = [ {file = "widgetsnbextension-4.0.10.tar.gz", hash = "sha256:64196c5ff3b9a9183a8e699a4227fb0b7002f252c814098e66c4d1cd0644688f"}, ] +[[package]] +name = "win32-setctime" +version = "1.1.0" +description = "A small Python utility to set file creation time on Windows" +optional = false +python-versions = ">=3.5" +files = [ + {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"}, + {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"}, +] + +[package.extras] +dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] + [[package]] name = "xyzservices" version = "2024.4.0" @@ -5458,4 +5490,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "f607472660b04b7f6f5d49a4561730f788a46f0d1e0176322e872111b00481cd" +content-hash = "d8cc3168211c9f7eaddf78e15b3077aadb6cda3358dfacfa83da732af83aa899" diff --git a/pyproject.toml b/pyproject.toml index 1627157..d77e2ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ safetensors = "^0.4.3" alive-progress = "^3.1.5" hvplot = "^0.10.0" pyarrow = "^16.1.0" +loguru = "^0.7.2" [build-system] diff --git a/symbolic_nn_tests/__main__.py b/symbolic_nn_tests/__main__.py index 3a59f26..7ba6365 100644 --- a/symbolic_nn_tests/__main__.py +++ b/symbolic_nn_tests/__main__.py @@ -1,14 +1,15 @@ import typer from typing import Optional, Iterable from typing_extensions import Annotated -from . import experiment1 +from loguru import logger +from . import experiment1, experiment2 -EXPERIMENTS = (experiment1,) +EXPERIMENTS = (experiment1, experiment2) def parse_int_or_intiterable(i: Optional[str]) -> Iterable[int]: - return range(1, len(EXPERIMENTS) + 1) if i is None else map(int, i.split(",")) + return range(1, len(EXPERIMENTS) + 1) if i is None else list(map(int, i.split(","))) def main( @@ -26,10 +27,10 @@ def main( bool, typer.Option(help="Whether or not to log via Weights & Biases") ] = True, ): - experiment_indeces = (i - 1 for i in experiments) - experiment_funcs = [EXPERIMENTS[i].run for i in experiment_indeces] - - for experiment in experiment_funcs: + for i, n in enumerate(experiments, start=1): + j = n - 1 + experiment = EXPERIMENTS[j].run + logger.info(f"Running Experiment {n} ({i}/{len(experiments)})...") experiment(tensorboard=tensorboard, wandb=wandb) diff --git a/symbolic_nn_tests/experiment2/__init__.py b/symbolic_nn_tests/experiment2/__init__.py index e845b79..906af50 100644 --- a/symbolic_nn_tests/experiment2/__init__.py +++ b/symbolic_nn_tests/experiment2/__init__.py @@ -27,49 +27,25 @@ def test(loss_func, version, tensorboard=True, wandb=True): ) logger.append(wandb_logger) - test_model(logger=logger, loss_func=loss_func, lr=LEARNING_RATE) + test_model(logger=logger, loss_func=loss_func) if wandb: _wandb.finish() def run(tensorboard: bool = True, wandb: bool = True): + from .model import unpacking_mse_loss from . import semantic_loss - from torch import nn test( - nn.functional.cross_entropy, - "cross_entropy", + unpacking_mse_loss, + "mse_loss", tensorboard=tensorboard, wandb=wandb, ) test( - semantic_loss.similarity_cross_entropy, - "similarity_cross_entropy", - tensorboard=tensorboard, - wandb=wandb, - ) - test( - semantic_loss.hasline_cross_entropy, - "hasline_cross_entropy", - tensorboard=tensorboard, - wandb=wandb, - ) - test( - semantic_loss.hasloop_cross_entropy, - "hasloop_cross_entropy", - tensorboard=tensorboard, - wandb=wandb, - ) - test( - semantic_loss.multisemantic_cross_entropy, - "multisemantic_cross_entropy", - tensorboard=tensorboard, - wandb=wandb, - ) - test( - semantic_loss.garbage_cross_entropy, - "garbage_cross_entropy", + semantic_loss.positive_slope_linear_loss, + "positive_slope_linear_loss", tensorboard=tensorboard, wandb=wandb, ) diff --git a/symbolic_nn_tests/experiment2/dataset.py b/symbolic_nn_tests/experiment2/dataset.py index 4b91a12..723e360 100644 --- a/symbolic_nn_tests/experiment2/dataset.py +++ b/symbolic_nn_tests/experiment2/dataset.py @@ -11,6 +11,7 @@ from multiprocessing import Pool from symbolic_nn_tests.dataloader import DATASET_DIR import warnings from tqdm.auto import tqdm +from loguru import logger warnings.filterwarnings(action="ignore", category=UserWarning) @@ -47,58 +48,58 @@ def get_dataset(): ): construct_dataset("pubchem") else: - print("Pre-existing dataset detected!") - print("Dataset loaded!") + logger.info("Pre-existing dataset detected!") + logger.info("Dataset loaded!") return TensorDataset(*load_dataset("pubchem")) def construct_dataset(filename): - print("Constructing dataset...") + logger.info("Constructing dataset...") df = construct_ds_dataframe(filename) save_dataframe_to_dataset(df, PUBCHEM_DIR / f"{filename}.pickle") - print("Dataset constructed!") + logger.info("Dataset constructed!") def construct_ds_dataframe(filename): - print("Constructing dataset dataframe...") + logger.info("Constructing dataset dataframe...") df = add_molecule_encodings(construct_raw_dataset(filename)) # NOTE: This kind of checkpointing will be used throughout the construction process It doesn't # take much disk space, it lets the GC collect out-of-scope data from the construction process # and it makes it easier to debug if construction fails parquet_file = PUBCHEM_DIR / f"{filename}.parquet" df.write_parquet(parquet_file) - print("Dataset dataframe constructed!") + logger.info("Dataset dataframe constructed!") return pl.read_parquet(parquet_file) def construct_raw_dataset(filename): - print("Constructing raw dataset...") + logger.info("Constructing raw dataset...") df = collate_dataset() parquet_file = PUBCHEM_DIR / f"{filename}_raw.parquet" df.write_parquet(parquet_file) - print("Raw dataset constructed!") + logger.info("Raw dataset constructed!") return pl.read_parquet(parquet_file) def collate_dataset(): - print("Collating dataset...") + logger.info("Collating dataset...") if not (PUBCHEM_DIR.exists() and len(tuple(PUBCHEM_DIR.glob("*.json")))): fetch_dataset() df = pl.concat( map(pl.read_json, PUBCHEM_DIR.glob("*.json")), ).drop("id") - print("dataset collated!") + logger.info("dataset collated!") return df def fetch_dataset(): - print("Fetching dataset...") + logger.info("Fetching dataset...") kaggle.api.dataset_download_files( "burakhmmtgl/predict-molecular-properties", quiet=False, path=DATASET_DIR ) shutil.unpack_archive(DATASET_DIR / "predict-molecular-properties.zip", PUBCHEM_DIR) - print("Dataset fetched!") + logger.info("Dataset fetched!") @lru_cache(maxsize=1) @@ -172,7 +173,7 @@ def encode_orbital(orbital): def save_dataframe_to_dataset(df, filename): - print("Saving dataset to tensors...") + logger.info("Saving dataset to tensors...") with (filename.parent / f"{filename.stem}_x0{filename.suffix}").open("wb") as f: pickle.dump(properties_to_tensor(df).float(), f) with (filename.parent / f"{filename.stem}_x1{filename.suffix}").open("wb") as f: @@ -180,7 +181,7 @@ def save_dataframe_to_dataset(df, filename): with (filename.parent / f"{filename.stem}_y{filename.suffix}").open("wb") as f: pickle.dump(df["En"].to_torch().float(), f) del df - print("Tensors saved!") + logger.info("Tensors saved!") def chunked_df(df, n): diff --git a/symbolic_nn_tests/experiment2/math.py b/symbolic_nn_tests/experiment2/math.py new file mode 100644 index 0000000..2b2af30 --- /dev/null +++ b/symbolic_nn_tests/experiment2/math.py @@ -0,0 +1,23 @@ +import torch + + +def sech(x): + return torch.reciprocal(torch.cosh(x)) + + +def linear_fit(x, y): + mean_x = torch.mean(x) + mean_y = torch.mean(y) + cov_xy = torch.mean(x * y) - (mean_x * mean_y) + var_x = torch.mean(x * x) - (mean_x * mean_x) + m = cov_xy / var_x + c = mean_y - (m * mean_x) + return m, c + + +def line(x, m, c): + return (m * x) + c + + +def linear_residuals(x, y, m, c): + return y - line(x, m, c) diff --git a/symbolic_nn_tests/experiment2/model.py b/symbolic_nn_tests/experiment2/model.py index c86bf76..e7b53de 100644 --- a/symbolic_nn_tests/experiment2/model.py +++ b/symbolic_nn_tests/experiment2/model.py @@ -14,9 +14,7 @@ class Model(nn.Module): self.encode_x0 = self.create_xval_encoding_fn(self.x0_encoder) self.encode_x1 = self.create_xval_encoding_fn(self.x1_encoder) self.ff = nn.Sequential( - nn.Linear(17, 256), - nn.ReLU(), - nn.Linear(256, 128), + nn.Linear(17, 128), nn.ReLU(), nn.Linear(128, 64), nn.ReLU(), @@ -59,95 +57,12 @@ def get_singleton_dataset(): ) -def smooth_l1_loss(out, y): +def unpacking_mse_loss(out, y): _, y_pred = out - return nn.functional.smooth_l1_loss(y_pred, y) + return nn.functional.mse_loss(y_pred, y) -def sech(x): - return torch.reciprocal(torch.cosh(x)) - - -def linear_fit(x, y): - mean_x = torch.mean(x) - mean_y = torch.mean(y) - cov_xy = torch.mean(x * y) - (mean_x * mean_y) - var_x = torch.mean(x * x) - (mean_x * mean_x) - m = cov_xy / var_x - c = mean_y - (m * mean_x) - return m, c - - -def line(x, m, c): - return (m * x) + c - - -def linear_residuals(x, y, m, c): - return y - line(x, m, c) - - -def semantic_loss(x, y_pred, w, a): - m, c = linear_fit(x, y_pred) - residuals = linear_residuals(x, y_pred, m, c) - scaled_residuals = residuals * sech(w * x) - slope_penalty = torch.nn.functional.softmax(a * m, dim=0) - loss = torch.mean(scaled_residuals**2) + torch.mean(slope_penalty) - return loss - - -def loss(out, y): - x, y_pred = out - x0, x1 = x - - # Here, we want to make semantic use of the differential electronegativity of the molecule - # so start by calculating that - mean_electronegativities = torch.tensor( - [i[:, 3].mean() for i in x0], dtype=torch.float32 - ).to(y_pred.device) - diff_electronegativity = ( - torch.tensor( - [ - (i[:, 3] - mean).abs().sum() - for i, mean in zip(x0, mean_electronegativities) - ], - dtype=torch.float32, - ) - * 4.0 - ).to(y_pred.device) - - # Then, we need to get a linear best fit on that. Our semantic info is based on a graph of - # En (y) vs differential electronegativity on the x vs y axes, so y_pred is y here - m, c = linear_fit(diff_electronegativity, y_pred) - - # To start with, we want to calculate a penalty based on deviation from a linear relationship - # Scaling is being based on 1/sech(w*r) as this increases multiplier as deviation grows. - # `w` was selected based on noting that the residual spread before eneg scaling was about 25; - # enegs were normalised as x/4, so we want to incentivize a spread of about 25/4~=6, and w=0.2 - # causes the penalty function to cross 2 at just over 6. Yes, that's a bit arbitrary but we're - # just steering the model not applying hard constraints to it shold be fine. - residual_penalty = ( - ( - linear_residuals(diff_electronegativity, y_pred, m, c) - / sech(0.2 * diff_electronegativity) - ) - .abs() - .float() - .mean() - ) - - # We also need to calculate a penalty that incentivizes a positive slope. For this, im using softmax - # to scale the slope as it will penalise negative slopes while not just creating a reward hack for - # maximizing slope. The softmax function approximates 1 from about 5 onwards, so if we multiply m by - # 500, then our penalty should be almost minimised for any slope above 0.01 and maximised below 0.01. - # This should suffice for incentivizing the model to favour positive slopes. - slope_penalty = (torch.nn.functional.softmax(-m * 500.0) + 1).mean() - - # Finally, let's get a smooth L1 loss and scale it based on these penalty functions - return nn.functional.smooth_l1_loss(y_pred, y) * residual_penalty * slope_penalty - - -# def main(loss_func=smooth_l1_loss, logger=None, **kwargs): -def main(loss_func=loss, logger=None, **kwargs): +def main(loss_func=unpacking_mse_loss, logger=None, **kwargs): import lightning as L from symbolic_nn_tests.train import TrainingWrapper @@ -160,7 +75,7 @@ def main(loss_func=loss, logger=None, **kwargs): train, val, test = get_singleton_dataset() lmodel = TrainingWrapper(Model(), loss_func=loss_func) lmodel.configure_optimizers(optimizer=torch.optim.NAdam, **kwargs) - trainer = L.Trainer(max_epochs=10, logger=logger, num_sanity_val_steps=0) + trainer = L.Trainer(max_epochs=10, logger=logger) trainer.fit(model=lmodel, train_dataloaders=train, val_dataloaders=val) trainer.test(dataloaders=test) diff --git a/symbolic_nn_tests/experiment2/semantic_loss.py b/symbolic_nn_tests/experiment2/semantic_loss.py index f52842b..0294990 100644 --- a/symbolic_nn_tests/experiment2/semantic_loss.py +++ b/symbolic_nn_tests/experiment2/semantic_loss.py @@ -1,3 +1,5 @@ +from symbolic_nn_tests.experiment2.math import linear_fit, linear_residuals, sech +from torch import nn import torch @@ -14,3 +16,54 @@ import torch # without creating a reward hack for maximizing/minimizing m and preventing exploding gradients. # It also allows us to avoid the assumption of linearity: we only care about the direction of # proportionality. + + +def positive_slope_linear_loss(out, y): + x, y_pred = out + x0, x1 = x + + # Here, we want to make semantic use of the differential electronegativity of the molecule + # so start by calculating that + mean_electronegativities = torch.tensor( + [i[:, 3].mean() for i in x0], dtype=torch.float32 + ).to(y_pred.device) + diff_electronegativity = ( + torch.tensor( + [ + (i[:, 3] - mean).abs().sum() + for i, mean in zip(x0, mean_electronegativities) + ], + dtype=torch.float32, + ) + * 4.0 + ).to(y_pred.device) + + # Then, we need to get a linear best fit on that. Our semantic info is based on a graph of + # En (y) vs differential electronegativity on the x vs y axes, so y_pred is y here + m, c = linear_fit(diff_electronegativity, y_pred) + + # To start with, we want to calculate a penalty based on deviation from a linear relationship + # Scaling is being based on 1/sech(w*r) as this increases multiplier as deviation grows. + # `w` was selected based on noting that the residual spread before eneg scaling was about 25; + # enegs were normalised as x/4, so we want to incentivize a spread of about 25/4~=6, and w=0.2 + # causes the penalty function to cross 2 at just over 6. Yes, that's a bit arbitrary but we're + # just steering the model not applying hard constraints to it shold be fine. + residual_penalty = ( + ( + linear_residuals(diff_electronegativity, y_pred, m, c) + / sech(0.2 * diff_electronegativity) + ) + .abs() + .float() + .mean() + ) + + # We also need to calculate a penalty that incentivizes a positive slope. For this, im using softmax + # to scale the slope as it will penalise negative slopes while not just creating a reward hack for + # maximizing slope. The softmax function approximates 1 from about 5 onwards, so if we multiply m by + # 500, then our penalty should be almost minimised for any slope above 0.01 and maximised below 0.01. + # This should suffice for incentivizing the model to favour positive slopes. + slope_penalty = (torch.nn.functional.softmax(-m * 500.0) + 1).mean() + + # Finally, let's get a smooth L1 loss and scale it based on these penalty functions + return nn.functional.mse_loss(y_pred, y) * residual_penalty * slope_penalty