{ "cells": [ { "cell_type": "raw", "metadata": {}, "source": [ "This sheet will contain functions for processing sample_X and sample_y into a format better suited for the neural network." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/cianh/Programming/Git_Projects/Aconity_ML_Expt1/.venv/lib/python3.11/site-packages/distributed/node.py:182: UserWarning: Port 8787 is already in use.\n", "Perhaps you already have a cluster running?\n", "Hosting the HTTP server on port 42341 instead\n", " warnings.warn(\n" ] }, { "data": { "text/html": [ "
\n", "
\n", "
\n", "

Client

\n", "

Client-c75affb4-3164-11ee-9498-94e70ba5618f

\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "
Connection method: Cluster objectCluster type: distributed.LocalCluster
\n", " Dashboard: http://127.0.0.1:42341/status\n", "
\n", "\n", " \n", "\n", " \n", "
\n", "

Cluster Info

\n", "
\n", "
\n", "
\n", "
\n", "

LocalCluster

\n", "

c08f98d9

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", "
\n", " Dashboard: http://127.0.0.1:42341/status\n", " \n", " Workers: 12\n", "
\n", " Total threads: 12\n", " \n", " Total memory: 15.27 GiB\n", "
Status: runningUsing processes: True
\n", "\n", "
\n", " \n", "

Scheduler Info

\n", "
\n", "\n", "
\n", "
\n", "
\n", "
\n", "

Scheduler

\n", "

Scheduler-3b57d4c0-ee42-41d2-8f9e-8b3c447dbbba

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", " Comm: tcp://127.0.0.1:41583\n", " \n", " Workers: 12\n", "
\n", " Dashboard: http://127.0.0.1:42341/status\n", " \n", " Total threads: 12\n", "
\n", " Started: Just now\n", " \n", " Total memory: 15.27 GiB\n", "
\n", "
\n", "
\n", "\n", "
\n", " \n", "

Workers

\n", "
\n", "\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 0

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:36129\n", " \n", " Total threads: 1\n", "
\n", " Dashboard: http://127.0.0.1:45385/status\n", " \n", " Memory: 1.27 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:34663\n", "
\n", " Local directory: /tmp/dask-scratch-space/worker-nvlgjdsy\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 1

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:44549\n", " \n", " Total threads: 1\n", "
\n", " Dashboard: http://127.0.0.1:35385/status\n", " \n", " Memory: 1.27 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:43857\n", "
\n", " Local directory: /tmp/dask-scratch-space/worker-vq7_1l_n\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 2

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:39469\n", " \n", " Total threads: 1\n", "
\n", " Dashboard: http://127.0.0.1:34561/status\n", " \n", " Memory: 1.27 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:34253\n", "
\n", " Local directory: /tmp/dask-scratch-space/worker-6d09l2i_\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 3

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:46873\n", " \n", " Total threads: 1\n", "
\n", " Dashboard: http://127.0.0.1:43591/status\n", " \n", " Memory: 1.27 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:43029\n", "
\n", " Local directory: /tmp/dask-scratch-space/worker-ufai363_\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 4

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:44661\n", " \n", " Total threads: 1\n", "
\n", " Dashboard: http://127.0.0.1:38421/status\n", " \n", " Memory: 1.27 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:33371\n", "
\n", " Local directory: /tmp/dask-scratch-space/worker-pg33jmms\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 5

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:39485\n", " \n", " Total threads: 1\n", "
\n", " Dashboard: http://127.0.0.1:38259/status\n", " \n", " Memory: 1.27 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:43601\n", "
\n", " Local directory: /tmp/dask-scratch-space/worker-zjadt63s\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 6

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:36823\n", " \n", " Total threads: 1\n", "
\n", " Dashboard: http://127.0.0.1:33121/status\n", " \n", " Memory: 1.27 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:44609\n", "
\n", " Local directory: /tmp/dask-scratch-space/worker-2rplwyh_\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 7

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:44023\n", " \n", " Total threads: 1\n", "
\n", " Dashboard: http://127.0.0.1:38183/status\n", " \n", " Memory: 1.27 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:41607\n", "
\n", " Local directory: /tmp/dask-scratch-space/worker-pqn109wo\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 8

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:37553\n", " \n", " Total threads: 1\n", "
\n", " Dashboard: http://127.0.0.1:45271/status\n", " \n", " Memory: 1.27 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:35725\n", "
\n", " Local directory: /tmp/dask-scratch-space/worker-ns7d8c9m\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 9

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:36985\n", " \n", " Total threads: 1\n", "
\n", " Dashboard: http://127.0.0.1:45069/status\n", " \n", " Memory: 1.27 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:36523\n", "
\n", " Local directory: /tmp/dask-scratch-space/worker-6nhixvcl\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 10

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:46097\n", " \n", " Total threads: 1\n", "
\n", " Dashboard: http://127.0.0.1:44731/status\n", " \n", " Memory: 1.27 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:37565\n", "
\n", " Local directory: /tmp/dask-scratch-space/worker-n9rygn28\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 11

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:42077\n", " \n", " Total threads: 1\n", "
\n", " Dashboard: http://127.0.0.1:37763/status\n", " \n", " Memory: 1.27 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:34247\n", "
\n", " Local directory: /tmp/dask-scratch-space/worker-0arh8rvc\n", "
\n", "
\n", "
\n", "
\n", " \n", "\n", "
\n", "
\n", "\n", "
\n", "
\n", "
\n", "
\n", " \n", "\n", "
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from pathlib import Path\n", "from dask.distributed import Client, LocalCluster, as_completed\n", "import dask.dataframe as dd\n", "import pandas as pd\n", "import pickle\n", "from tqdm.auto import tqdm\n", "\n", "# Prepare a dask cluster and client\n", "cluster = LocalCluster(n_workers=12, threads_per_worker=1)\n", "client = Client(cluster)\n", "client" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Load X and y for training\n", "samples = list(range(1, 82))\n", "\n", "X = {i: dd.read_parquet(f\"sample_X/{i}\") for i in samples}\n", "\n", "with open(\"sample_y.pkl\", \"rb\") as f:\n", " y = pickle.load(f)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def separate_params(d):\n", " for sample, df in d.items():\n", " params = df[\n", " [\n", " \"Laser power, P (W)\",\n", " \"Scan speed, V (mm/sec)\",\n", " \"Spot size, F (µm)\",\n", " \"Hatch spacing, H (µm)\",\n", " ]\n", " ]\n", " params = params.rename(\n", " columns={\n", " \"Laser power, P (W)\": \"laser_power\",\n", " \"Scan speed, V (mm/sec)\": \"scan_speed\",\n", " \"Spot size, F (µm)\": \"spot_size\",\n", " \"Hatch spacing, H (µm)\": \"hatch_spacing\",\n", " }\n", " )\n", " yield sample, params\n", "\n", "\n", "params = dict(separate_params(y))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def separate_composition(d):\n", " for sample, df in d.items():\n", " df = df[[\"Ni (Norm)\", \"Ti (Norm)\"]]\n", " df = df.rename(columns={\"Ni (Norm)\": \"Ni\", \"Ti (Norm)\": \"Ti\"})\n", " yield sample, df\n", "\n", "\n", "composition = dict(separate_composition(y))" ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [], "source": [ "with open(\"sample_X.pkl\", \"wb\") as f:\n", " pickle.dump(\n", " {\n", " int(sample.stem): (\n", " params[int(sample.stem)],\n", " dd.read_parquet(sample),\n", " )\n", " for sample in Path(\"sample_X\").iterdir()\n", " },\n", " f,\n", " )\n", "\n", "with open(\"sample_y.pkl\", \"wb\") as f:\n", " pickle.dump(composition, f)" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2dd118d845c84669ac69a8cbac359301", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/7 [00:00