Switched loss in expt2 to smooth_l1

Switched from mse_loss to smooth_l1_loss to avoid exploding gradient and NaNs when using mse_loss.
2026-02-24 01:48:03 +00:00 · 2024-06-07 10:58:25 +01:00
parent 1c21ee25d7
commit c7133a8bb1
3 changed files with 13 additions and 13 deletions
--- a/symbolic_nn_tests/experiment2/init.py
+++ b/symbolic_nn_tests/experiment2/init.py
@@ -40,21 +40,21 @@ def test(train_loss, val_loss, test_loss, version, tensorboard=True, wandb=True)


 def run(tensorboard: bool = True, wandb: bool = True):
-    from .model import unpacking_mse_loss
+    from .model import unpacking_smooth_l1_loss
    from . import semantic_loss

    test(
-        train_loss=unpacking_mse_loss,
-        val_loss=unpacking_mse_loss,
-        test_loss=unpacking_mse_loss,
-        version="mse_loss",
+        train_loss=unpacking_smooth_l1_loss,
+        val_loss=unpacking_smooth_l1_loss,
+        test_loss=unpacking_smooth_l1_loss,
+        version="smooth_l1_loss",
        tensorboard=tensorboard,
        wandb=wandb,
    )
    test(
        train_loss=semantic_loss.positive_slope_linear_loss,
-        val_loss=unpacking_mse_loss,
-        test_loss=unpacking_mse_loss,
+        val_loss=unpacking_smooth_l1_loss,
+        test_loss=unpacking_smooth_l1_loss,
        version="positive_slope_linear_loss",
        tensorboard=tensorboard,
        wandb=wandb,
--- a/symbolic_nn_tests/experiment2/model.py
+++ b/symbolic_nn_tests/experiment2/model.py
@@ -61,15 +61,15 @@ def get_singleton_dataset():
    )


-def unpacking_mse_loss(out, y):
+def unpacking_smooth_l1_loss(out, y):
    _, y_pred = out
-    return nn.functional.mse_loss(y_pred, y)
+    return nn.functional.smooth_l1_loss(y_pred, y)


 def main(
-    train_loss=unpacking_mse_loss,
-    val_loss=unpacking_mse_loss,
-    test_loss=unpacking_mse_loss,
+    train_loss=unpacking_smooth_l1_loss,
+    val_loss=unpacking_smooth_l1_loss,
+    test_loss=unpacking_smooth_l1_loss,
    logger=None,
    **kwargs,
 ):
--- a/symbolic_nn_tests/experiment2/semantic_loss.py
+++ b/symbolic_nn_tests/experiment2/semantic_loss.py
@@ -66,4 +66,4 @@ def positive_slope_linear_loss(out, y):
    slope_penalty = (torch.nn.functional.softmax(-m * 500.0) + 1).mean()

    # Finally, let's get a smooth L1 loss and scale it based on these penalty functions
-    return nn.functional.mse_loss(y_pred, y) * residual_penalty * slope_penalty
+    return nn.functional.smooth_l1_loss(y_pred, y) * residual_penalty * slope_penalty