mirror of
https://github.com/Cian-H/symbolic_nn_tests.git
synced 2025-12-22 14:11:59 +00:00
Refactor of experiment2 for convenience
This commit is contained in:
34
poetry.lock
generated
34
poetry.lock
generated
@@ -2321,6 +2321,24 @@ dev = ["black", "flake8", "isort", "pre-commit"]
|
||||
doc = ["myst-parser", "sphinx", "sphinx-book-theme"]
|
||||
test = ["coverage", "pytest", "pytest-cov"]
|
||||
|
||||
[[package]]
|
||||
name = "loguru"
|
||||
version = "0.7.2"
|
||||
description = "Python logging made (stupidly) simple"
|
||||
optional = false
|
||||
python-versions = ">=3.5"
|
||||
files = [
|
||||
{file = "loguru-0.7.2-py3-none-any.whl", hash = "sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb"},
|
||||
{file = "loguru-0.7.2.tar.gz", hash = "sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""}
|
||||
win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""}
|
||||
|
||||
[package.extras]
|
||||
dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.4.1)", "mypy (==v1.5.1)", "pre-commit (==3.4.0)", "pytest (==6.1.2)", "pytest (==7.4.0)", "pytest-cov (==2.12.1)", "pytest-cov (==4.1.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.0.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.3.0)", "tox (==3.27.1)", "tox (==4.11.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "mako"
|
||||
version = "1.3.5"
|
||||
@@ -5341,6 +5359,20 @@ files = [
|
||||
{file = "widgetsnbextension-4.0.10.tar.gz", hash = "sha256:64196c5ff3b9a9183a8e699a4227fb0b7002f252c814098e66c4d1cd0644688f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "win32-setctime"
|
||||
version = "1.1.0"
|
||||
description = "A small Python utility to set file creation time on Windows"
|
||||
optional = false
|
||||
python-versions = ">=3.5"
|
||||
files = [
|
||||
{file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"},
|
||||
{file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "xyzservices"
|
||||
version = "2024.4.0"
|
||||
@@ -5458,4 +5490,4 @@ multidict = ">=4.0"
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "f607472660b04b7f6f5d49a4561730f788a46f0d1e0176322e872111b00481cd"
|
||||
content-hash = "d8cc3168211c9f7eaddf78e15b3077aadb6cda3358dfacfa83da732af83aa899"
|
||||
|
||||
@@ -30,6 +30,7 @@ safetensors = "^0.4.3"
|
||||
alive-progress = "^3.1.5"
|
||||
hvplot = "^0.10.0"
|
||||
pyarrow = "^16.1.0"
|
||||
loguru = "^0.7.2"
|
||||
|
||||
|
||||
[build-system]
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
import typer
|
||||
from typing import Optional, Iterable
|
||||
from typing_extensions import Annotated
|
||||
from . import experiment1
|
||||
from loguru import logger
|
||||
from . import experiment1, experiment2
|
||||
|
||||
|
||||
EXPERIMENTS = (experiment1,)
|
||||
EXPERIMENTS = (experiment1, experiment2)
|
||||
|
||||
|
||||
def parse_int_or_intiterable(i: Optional[str]) -> Iterable[int]:
|
||||
return range(1, len(EXPERIMENTS) + 1) if i is None else map(int, i.split(","))
|
||||
return range(1, len(EXPERIMENTS) + 1) if i is None else list(map(int, i.split(",")))
|
||||
|
||||
|
||||
def main(
|
||||
@@ -26,10 +27,10 @@ def main(
|
||||
bool, typer.Option(help="Whether or not to log via Weights & Biases")
|
||||
] = True,
|
||||
):
|
||||
experiment_indeces = (i - 1 for i in experiments)
|
||||
experiment_funcs = [EXPERIMENTS[i].run for i in experiment_indeces]
|
||||
|
||||
for experiment in experiment_funcs:
|
||||
for i, n in enumerate(experiments, start=1):
|
||||
j = n - 1
|
||||
experiment = EXPERIMENTS[j].run
|
||||
logger.info(f"Running Experiment {n} ({i}/{len(experiments)})...")
|
||||
experiment(tensorboard=tensorboard, wandb=wandb)
|
||||
|
||||
|
||||
|
||||
@@ -27,49 +27,25 @@ def test(loss_func, version, tensorboard=True, wandb=True):
|
||||
)
|
||||
logger.append(wandb_logger)
|
||||
|
||||
test_model(logger=logger, loss_func=loss_func, lr=LEARNING_RATE)
|
||||
test_model(logger=logger, loss_func=loss_func)
|
||||
|
||||
if wandb:
|
||||
_wandb.finish()
|
||||
|
||||
|
||||
def run(tensorboard: bool = True, wandb: bool = True):
|
||||
from .model import unpacking_mse_loss
|
||||
from . import semantic_loss
|
||||
from torch import nn
|
||||
|
||||
test(
|
||||
nn.functional.cross_entropy,
|
||||
"cross_entropy",
|
||||
unpacking_mse_loss,
|
||||
"mse_loss",
|
||||
tensorboard=tensorboard,
|
||||
wandb=wandb,
|
||||
)
|
||||
test(
|
||||
semantic_loss.similarity_cross_entropy,
|
||||
"similarity_cross_entropy",
|
||||
tensorboard=tensorboard,
|
||||
wandb=wandb,
|
||||
)
|
||||
test(
|
||||
semantic_loss.hasline_cross_entropy,
|
||||
"hasline_cross_entropy",
|
||||
tensorboard=tensorboard,
|
||||
wandb=wandb,
|
||||
)
|
||||
test(
|
||||
semantic_loss.hasloop_cross_entropy,
|
||||
"hasloop_cross_entropy",
|
||||
tensorboard=tensorboard,
|
||||
wandb=wandb,
|
||||
)
|
||||
test(
|
||||
semantic_loss.multisemantic_cross_entropy,
|
||||
"multisemantic_cross_entropy",
|
||||
tensorboard=tensorboard,
|
||||
wandb=wandb,
|
||||
)
|
||||
test(
|
||||
semantic_loss.garbage_cross_entropy,
|
||||
"garbage_cross_entropy",
|
||||
semantic_loss.positive_slope_linear_loss,
|
||||
"positive_slope_linear_loss",
|
||||
tensorboard=tensorboard,
|
||||
wandb=wandb,
|
||||
)
|
||||
|
||||
@@ -11,6 +11,7 @@ from multiprocessing import Pool
|
||||
from symbolic_nn_tests.dataloader import DATASET_DIR
|
||||
import warnings
|
||||
from tqdm.auto import tqdm
|
||||
from loguru import logger
|
||||
|
||||
|
||||
warnings.filterwarnings(action="ignore", category=UserWarning)
|
||||
@@ -47,58 +48,58 @@ def get_dataset():
|
||||
):
|
||||
construct_dataset("pubchem")
|
||||
else:
|
||||
print("Pre-existing dataset detected!")
|
||||
print("Dataset loaded!")
|
||||
logger.info("Pre-existing dataset detected!")
|
||||
logger.info("Dataset loaded!")
|
||||
return TensorDataset(*load_dataset("pubchem"))
|
||||
|
||||
|
||||
def construct_dataset(filename):
|
||||
print("Constructing dataset...")
|
||||
logger.info("Constructing dataset...")
|
||||
df = construct_ds_dataframe(filename)
|
||||
save_dataframe_to_dataset(df, PUBCHEM_DIR / f"{filename}.pickle")
|
||||
print("Dataset constructed!")
|
||||
logger.info("Dataset constructed!")
|
||||
|
||||
|
||||
def construct_ds_dataframe(filename):
|
||||
print("Constructing dataset dataframe...")
|
||||
logger.info("Constructing dataset dataframe...")
|
||||
df = add_molecule_encodings(construct_raw_dataset(filename))
|
||||
# NOTE: This kind of checkpointing will be used throughout the construction process It doesn't
|
||||
# take much disk space, it lets the GC collect out-of-scope data from the construction process
|
||||
# and it makes it easier to debug if construction fails
|
||||
parquet_file = PUBCHEM_DIR / f"{filename}.parquet"
|
||||
df.write_parquet(parquet_file)
|
||||
print("Dataset dataframe constructed!")
|
||||
logger.info("Dataset dataframe constructed!")
|
||||
return pl.read_parquet(parquet_file)
|
||||
|
||||
|
||||
def construct_raw_dataset(filename):
|
||||
print("Constructing raw dataset...")
|
||||
logger.info("Constructing raw dataset...")
|
||||
df = collate_dataset()
|
||||
parquet_file = PUBCHEM_DIR / f"{filename}_raw.parquet"
|
||||
df.write_parquet(parquet_file)
|
||||
print("Raw dataset constructed!")
|
||||
logger.info("Raw dataset constructed!")
|
||||
return pl.read_parquet(parquet_file)
|
||||
|
||||
|
||||
def collate_dataset():
|
||||
print("Collating dataset...")
|
||||
logger.info("Collating dataset...")
|
||||
if not (PUBCHEM_DIR.exists() and len(tuple(PUBCHEM_DIR.glob("*.json")))):
|
||||
fetch_dataset()
|
||||
|
||||
df = pl.concat(
|
||||
map(pl.read_json, PUBCHEM_DIR.glob("*.json")),
|
||||
).drop("id")
|
||||
print("dataset collated!")
|
||||
logger.info("dataset collated!")
|
||||
return df
|
||||
|
||||
|
||||
def fetch_dataset():
|
||||
print("Fetching dataset...")
|
||||
logger.info("Fetching dataset...")
|
||||
kaggle.api.dataset_download_files(
|
||||
"burakhmmtgl/predict-molecular-properties", quiet=False, path=DATASET_DIR
|
||||
)
|
||||
shutil.unpack_archive(DATASET_DIR / "predict-molecular-properties.zip", PUBCHEM_DIR)
|
||||
print("Dataset fetched!")
|
||||
logger.info("Dataset fetched!")
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
@@ -172,7 +173,7 @@ def encode_orbital(orbital):
|
||||
|
||||
|
||||
def save_dataframe_to_dataset(df, filename):
|
||||
print("Saving dataset to tensors...")
|
||||
logger.info("Saving dataset to tensors...")
|
||||
with (filename.parent / f"{filename.stem}_x0{filename.suffix}").open("wb") as f:
|
||||
pickle.dump(properties_to_tensor(df).float(), f)
|
||||
with (filename.parent / f"{filename.stem}_x1{filename.suffix}").open("wb") as f:
|
||||
@@ -180,7 +181,7 @@ def save_dataframe_to_dataset(df, filename):
|
||||
with (filename.parent / f"{filename.stem}_y{filename.suffix}").open("wb") as f:
|
||||
pickle.dump(df["En"].to_torch().float(), f)
|
||||
del df
|
||||
print("Tensors saved!")
|
||||
logger.info("Tensors saved!")
|
||||
|
||||
|
||||
def chunked_df(df, n):
|
||||
|
||||
23
symbolic_nn_tests/experiment2/math.py
Normal file
23
symbolic_nn_tests/experiment2/math.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import torch
|
||||
|
||||
|
||||
def sech(x):
|
||||
return torch.reciprocal(torch.cosh(x))
|
||||
|
||||
|
||||
def linear_fit(x, y):
|
||||
mean_x = torch.mean(x)
|
||||
mean_y = torch.mean(y)
|
||||
cov_xy = torch.mean(x * y) - (mean_x * mean_y)
|
||||
var_x = torch.mean(x * x) - (mean_x * mean_x)
|
||||
m = cov_xy / var_x
|
||||
c = mean_y - (m * mean_x)
|
||||
return m, c
|
||||
|
||||
|
||||
def line(x, m, c):
|
||||
return (m * x) + c
|
||||
|
||||
|
||||
def linear_residuals(x, y, m, c):
|
||||
return y - line(x, m, c)
|
||||
@@ -14,9 +14,7 @@ class Model(nn.Module):
|
||||
self.encode_x0 = self.create_xval_encoding_fn(self.x0_encoder)
|
||||
self.encode_x1 = self.create_xval_encoding_fn(self.x1_encoder)
|
||||
self.ff = nn.Sequential(
|
||||
nn.Linear(17, 256),
|
||||
nn.ReLU(),
|
||||
nn.Linear(256, 128),
|
||||
nn.Linear(17, 128),
|
||||
nn.ReLU(),
|
||||
nn.Linear(128, 64),
|
||||
nn.ReLU(),
|
||||
@@ -59,95 +57,12 @@ def get_singleton_dataset():
|
||||
)
|
||||
|
||||
|
||||
def smooth_l1_loss(out, y):
|
||||
def unpacking_mse_loss(out, y):
|
||||
_, y_pred = out
|
||||
return nn.functional.smooth_l1_loss(y_pred, y)
|
||||
return nn.functional.mse_loss(y_pred, y)
|
||||
|
||||
|
||||
def sech(x):
|
||||
return torch.reciprocal(torch.cosh(x))
|
||||
|
||||
|
||||
def linear_fit(x, y):
|
||||
mean_x = torch.mean(x)
|
||||
mean_y = torch.mean(y)
|
||||
cov_xy = torch.mean(x * y) - (mean_x * mean_y)
|
||||
var_x = torch.mean(x * x) - (mean_x * mean_x)
|
||||
m = cov_xy / var_x
|
||||
c = mean_y - (m * mean_x)
|
||||
return m, c
|
||||
|
||||
|
||||
def line(x, m, c):
|
||||
return (m * x) + c
|
||||
|
||||
|
||||
def linear_residuals(x, y, m, c):
|
||||
return y - line(x, m, c)
|
||||
|
||||
|
||||
def semantic_loss(x, y_pred, w, a):
|
||||
m, c = linear_fit(x, y_pred)
|
||||
residuals = linear_residuals(x, y_pred, m, c)
|
||||
scaled_residuals = residuals * sech(w * x)
|
||||
slope_penalty = torch.nn.functional.softmax(a * m, dim=0)
|
||||
loss = torch.mean(scaled_residuals**2) + torch.mean(slope_penalty)
|
||||
return loss
|
||||
|
||||
|
||||
def loss(out, y):
|
||||
x, y_pred = out
|
||||
x0, x1 = x
|
||||
|
||||
# Here, we want to make semantic use of the differential electronegativity of the molecule
|
||||
# so start by calculating that
|
||||
mean_electronegativities = torch.tensor(
|
||||
[i[:, 3].mean() for i in x0], dtype=torch.float32
|
||||
).to(y_pred.device)
|
||||
diff_electronegativity = (
|
||||
torch.tensor(
|
||||
[
|
||||
(i[:, 3] - mean).abs().sum()
|
||||
for i, mean in zip(x0, mean_electronegativities)
|
||||
],
|
||||
dtype=torch.float32,
|
||||
)
|
||||
* 4.0
|
||||
).to(y_pred.device)
|
||||
|
||||
# Then, we need to get a linear best fit on that. Our semantic info is based on a graph of
|
||||
# En (y) vs differential electronegativity on the x vs y axes, so y_pred is y here
|
||||
m, c = linear_fit(diff_electronegativity, y_pred)
|
||||
|
||||
# To start with, we want to calculate a penalty based on deviation from a linear relationship
|
||||
# Scaling is being based on 1/sech(w*r) as this increases multiplier as deviation grows.
|
||||
# `w` was selected based on noting that the residual spread before eneg scaling was about 25;
|
||||
# enegs were normalised as x/4, so we want to incentivize a spread of about 25/4~=6, and w=0.2
|
||||
# causes the penalty function to cross 2 at just over 6. Yes, that's a bit arbitrary but we're
|
||||
# just steering the model not applying hard constraints to it shold be fine.
|
||||
residual_penalty = (
|
||||
(
|
||||
linear_residuals(diff_electronegativity, y_pred, m, c)
|
||||
/ sech(0.2 * diff_electronegativity)
|
||||
)
|
||||
.abs()
|
||||
.float()
|
||||
.mean()
|
||||
)
|
||||
|
||||
# We also need to calculate a penalty that incentivizes a positive slope. For this, im using softmax
|
||||
# to scale the slope as it will penalise negative slopes while not just creating a reward hack for
|
||||
# maximizing slope. The softmax function approximates 1 from about 5 onwards, so if we multiply m by
|
||||
# 500, then our penalty should be almost minimised for any slope above 0.01 and maximised below 0.01.
|
||||
# This should suffice for incentivizing the model to favour positive slopes.
|
||||
slope_penalty = (torch.nn.functional.softmax(-m * 500.0) + 1).mean()
|
||||
|
||||
# Finally, let's get a smooth L1 loss and scale it based on these penalty functions
|
||||
return nn.functional.smooth_l1_loss(y_pred, y) * residual_penalty * slope_penalty
|
||||
|
||||
|
||||
# def main(loss_func=smooth_l1_loss, logger=None, **kwargs):
|
||||
def main(loss_func=loss, logger=None, **kwargs):
|
||||
def main(loss_func=unpacking_mse_loss, logger=None, **kwargs):
|
||||
import lightning as L
|
||||
|
||||
from symbolic_nn_tests.train import TrainingWrapper
|
||||
@@ -160,7 +75,7 @@ def main(loss_func=loss, logger=None, **kwargs):
|
||||
train, val, test = get_singleton_dataset()
|
||||
lmodel = TrainingWrapper(Model(), loss_func=loss_func)
|
||||
lmodel.configure_optimizers(optimizer=torch.optim.NAdam, **kwargs)
|
||||
trainer = L.Trainer(max_epochs=10, logger=logger, num_sanity_val_steps=0)
|
||||
trainer = L.Trainer(max_epochs=10, logger=logger)
|
||||
trainer.fit(model=lmodel, train_dataloaders=train, val_dataloaders=val)
|
||||
trainer.test(dataloaders=test)
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from symbolic_nn_tests.experiment2.math import linear_fit, linear_residuals, sech
|
||||
from torch import nn
|
||||
import torch
|
||||
|
||||
|
||||
@@ -14,3 +16,54 @@ import torch
|
||||
# without creating a reward hack for maximizing/minimizing m and preventing exploding gradients.
|
||||
# It also allows us to avoid the assumption of linearity: we only care about the direction of
|
||||
# proportionality.
|
||||
|
||||
|
||||
def positive_slope_linear_loss(out, y):
|
||||
x, y_pred = out
|
||||
x0, x1 = x
|
||||
|
||||
# Here, we want to make semantic use of the differential electronegativity of the molecule
|
||||
# so start by calculating that
|
||||
mean_electronegativities = torch.tensor(
|
||||
[i[:, 3].mean() for i in x0], dtype=torch.float32
|
||||
).to(y_pred.device)
|
||||
diff_electronegativity = (
|
||||
torch.tensor(
|
||||
[
|
||||
(i[:, 3] - mean).abs().sum()
|
||||
for i, mean in zip(x0, mean_electronegativities)
|
||||
],
|
||||
dtype=torch.float32,
|
||||
)
|
||||
* 4.0
|
||||
).to(y_pred.device)
|
||||
|
||||
# Then, we need to get a linear best fit on that. Our semantic info is based on a graph of
|
||||
# En (y) vs differential electronegativity on the x vs y axes, so y_pred is y here
|
||||
m, c = linear_fit(diff_electronegativity, y_pred)
|
||||
|
||||
# To start with, we want to calculate a penalty based on deviation from a linear relationship
|
||||
# Scaling is being based on 1/sech(w*r) as this increases multiplier as deviation grows.
|
||||
# `w` was selected based on noting that the residual spread before eneg scaling was about 25;
|
||||
# enegs were normalised as x/4, so we want to incentivize a spread of about 25/4~=6, and w=0.2
|
||||
# causes the penalty function to cross 2 at just over 6. Yes, that's a bit arbitrary but we're
|
||||
# just steering the model not applying hard constraints to it shold be fine.
|
||||
residual_penalty = (
|
||||
(
|
||||
linear_residuals(diff_electronegativity, y_pred, m, c)
|
||||
/ sech(0.2 * diff_electronegativity)
|
||||
)
|
||||
.abs()
|
||||
.float()
|
||||
.mean()
|
||||
)
|
||||
|
||||
# We also need to calculate a penalty that incentivizes a positive slope. For this, im using softmax
|
||||
# to scale the slope as it will penalise negative slopes while not just creating a reward hack for
|
||||
# maximizing slope. The softmax function approximates 1 from about 5 onwards, so if we multiply m by
|
||||
# 500, then our penalty should be almost minimised for any slope above 0.01 and maximised below 0.01.
|
||||
# This should suffice for incentivizing the model to favour positive slopes.
|
||||
slope_penalty = (torch.nn.functional.softmax(-m * 500.0) + 1).mean()
|
||||
|
||||
# Finally, let's get a smooth L1 loss and scale it based on these penalty functions
|
||||
return nn.functional.mse_loss(y_pred, y) * residual_penalty * slope_penalty
|
||||
|
||||
Reference in New Issue
Block a user