Jupyter notebook to generate LSTM model artifacts to be used in rapp 65/15165/1
authorsunil.n <sunil.n@samsung.com>
Tue, 21 Oct 2025 06:32:44 +0000 (12:02 +0530)
committersunil.n <sunil.n@samsung.com>
Tue, 21 Oct 2025 06:32:44 +0000 (12:02 +0530)
Change-Id: Iae9c97ce54b5ce97d7bbf7c76804b187529fdd46
Signed-off-by: sunil.n <sunil.n@samsung.com>
sample-rapp-generator/rapp-slice-prb-prediction/RAN_Slice_PRB_Prediction_Rapp_Model_Generator.ipynb [new file with mode: 0644]

diff --git a/sample-rapp-generator/rapp-slice-prb-prediction/RAN_Slice_PRB_Prediction_Rapp_Model_Generator.ipynb b/sample-rapp-generator/rapp-slice-prb-prediction/RAN_Slice_PRB_Prediction_Rapp_Model_Generator.ipynb
new file mode 100644 (file)
index 0000000..eebfea2
--- /dev/null
@@ -0,0 +1,638 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# RAN Slice PRB Prediction rApp Model Generator\n",
+    "\n",
+    "This notebook generates an LSTM model for predicting Physical Resource Block (PRB) usage in RAN slices.\n",
+    "\n",
+    "## Overview\n",
+    "- Fetches NSSAI performance data from InfluxDB\n",
+    "- Preprocesses data with encoders and scalers\n",
+    "- Creates time series sequences for LSTM training\n",
+    "- Trains and evaluates LSTM model\n",
+    "- Saves model artifacts for deployment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Imports and Configuration\n",
+    "import os\n",
+    "import sys\n",
+    "import argparse\n",
+    "import json\n",
+    "from datetime import datetime, timezone\n",
+    "from typing import Tuple, List, Dict, Optional\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "from influxdb_client import InfluxDBClient\n",
+    "from influxdb_client.client.flux_table import FluxStructureEncoder\n",
+    "from influxdb_client.client.write_api import SYNCHRONOUS\n",
+    "\n",
+    "from sklearn.preprocessing import MinMaxScaler, OneHotEncoder\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from joblib import dump, load\n",
+    "\n",
+    "import tensorflow as tf\n",
+    "from tensorflow.keras.models import Sequential\n",
+    "from tensorflow.keras.layers import LSTM, Dense, Dropout, Attention\n",
+    "from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint\n",
+    "\n",
+    "from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error\n",
+    "import matplotlib.pyplot as plt\n",
+    "from typing import List, Optional"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# InfluxDB Configuration\n",
+    "influx_url = \"http://localhost:8086\"\n",
+    "influx_org = \"srib\"\n",
+    "influx_token = \"mRVPMAc6Wa9npA-mvJY2t3ttYS4Sr5JXRflkk81pt_edEz05aGujt2cnNBm3kITix6qRwLCr7HejEV6ufskOcA==\"\n",
+    "bucket =  \"nssi_pm_bucket\"\n",
+    "measurement = \"nssi_pm_bucket\"\n",
+    "start = \"-0\"\n",
+    "\n",
+    "# Field and Tag Definitions\n",
+    "field_prb_dl = \"RRU.PrbDl.SNSSAI\"\n",
+    "field_data_dl = \"DRB.PdcpSduVolumeDL.SNSSAI\"\n",
+    "field_rrc_succ = \"RRC.ConnEstabSucc.Cause\"\n",
+    "tag_slice_type = \"sliceType\"\n",
+    "tag_nssi_id = \"measObjLdn\"\n",
+    "\n",
+    "# Model Parameters\n",
+    "window = 672\n",
+    "horizon = 1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Fetching Functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def build_flux_query() -> str:\n",
+    "    \"\"\"\n",
+    "    Build a Flux query that:\n",
+    "    - filters measurement\n",
+    "    - keeps relevant fields\n",
+    "    - pivots to a wide table: columns for prb_dl, data_dl, rrc_succ\n",
+    "    - keeps slice_type and nssi_id as columns\n",
+    "    \"\"\"\n",
+    "\n",
+    "    field_names=[field_prb_dl, field_data_dl, field_rrc_succ]\n",
+    "    fields_filter = \" or \".join([f'r[\"_field\"] == \"{f}\"' for f in field_names])\n",
+    "    q = f'''\n",
+    "from(bucket: \"{bucket}\")\n",
+    "  |> range(start: {start})\n",
+    "  |> filter(fn: (r) => r[\"_measurement\"] == \"{measurement}\")\n",
+    "  |> filter(fn: (r) => {fields_filter})\n",
+    "  |> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\n",
+    "  |> keep(columns: [\"_time\", \"{tag_slice_type}\", \"{tag_nssi_id}\", \"{'\",\"'.join(field_names)}\"])\n",
+    "  |> sort(columns: [\"_time\"])\n",
+    "'''\n",
+    "\n",
+    "    return q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def fetch_from_influx() -> pd.DataFrame:\n",
+    "    client = InfluxDBClient(url=influx_url, token=influx_token, org=influx_org, timeout=60_000)\n",
+    "    query_api = client.query_api()\n",
+    "    flux = build_flux_query()\n",
+    "    tables = query_api.query_data_frame(query=flux)\n",
+    "    client.close()\n",
+    "\n",
+    "    if isinstance(tables, list) and len(tables) > 0:\n",
+    "        df = pd.concat(tables, ignore_index=True)\n",
+    "    else:\n",
+    "        df = tables\n",
+    "\n",
+    "    if df is None or df.empty:\n",
+    "        raise RuntimeError(\"No data returned from InfluxDB. Check your query parameters.\")\n",
+    "\n",
+    "    # Standardize column names\n",
+    "    df = df.rename(columns={\n",
+    "        \"_time\": \"time\",\n",
+    "        tag_slice_type: \"slice_type\",\n",
+    "        tag_nssi_id: \"nssi_id\", # Renaming measObjLdn to nssi_id\n",
+    "        field_prb_dl: \"prb_dl\",\n",
+    "        field_data_dl: \"data_dl\",\n",
+    "        field_rrc_succ: \"rrc_succ\"\n",
+    "    })\n",
+    "\n",
+    "    # Ensure types\n",
+    "    df[\"time\"] = pd.to_datetime(df[\"time\"], utc=True)\n",
+    "    df = df.sort_values([\"slice_type\", \"nssi_id\", \"time\"]).reset_index(drop=True)\n",
+    "\n",
+    "    # Drop rows with any NA in core columns\n",
+    "    df = df.dropna(subset=[\"slice_type\", \"nssi_id\", \"time\", \"prb_dl\", \"data_dl\", \"rrc_succ\"])\n",
+    "\n",
+    "    return df[[\"time\", \"slice_type\", \"nssi_id\", \"prb_dl\", \"data_dl\", \"rrc_succ\"]]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Preparation Functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def prepare_encoders_and_scalers(df: pd.DataFrame) -> Tuple[OneHotEncoder, OneHotEncoder, Dict[str, MinMaxScaler]]:\n",
+    "    slice_enc = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False)\n",
+    "    slice_enc.fit(df[[\"slice_type\"]])\n",
+    "\n",
+    "    nssi_enc = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False)\n",
+    "    nssi_enc.fit(df[[\"nssi_id\"]])\n",
+    "\n",
+    "    scalers = {\n",
+    "        \"prb\": MinMaxScaler(),\n",
+    "        \"data\": MinMaxScaler(),\n",
+    "        \"rrc\": MinMaxScaler(),\n",
+    "        \"y\": MinMaxScaler()\n",
+    "    }\n",
+    "    scalers[\"prb\"].fit(df[[\"prb_dl\"]])\n",
+    "    scalers[\"data\"].fit(df[[\"data_dl\"]])\n",
+    "    scalers[\"rrc\"].fit(df[[\"rrc_succ\"]])\n",
+    "    scalers[\"y\"].fit(df[[\"prb_dl\"]])  # target same as prb_dl\n",
+    "\n",
+    "    return slice_enc, nssi_enc, scalers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def make_sequences_per_slice(\n",
+    "    df: pd.DataFrame,\n",
+    "    window: int,\n",
+    "    horizon: int,\n",
+    "    target_column: str,\n",
+    "    slice_enc: OneHotEncoder,\n",
+    "    nssi_enc: OneHotEncoder,\n",
+    "    scalers: Dict[str, MinMaxScaler]\n",
+    ") -> Tuple[np.ndarray, np.ndarray]:\n",
+    "    \"\"\"\n",
+    "    For each (slice_type, nssi_id) combination, use rolling windows to create sequences.\n",
+    "    Features per timestep: [one-hot(slice_type), one-hot(nssi_id), scaled(prb_dl, data_dl, rrc_succ)]\n",
+    "    Target: next-step prb_dl (scaled with y_scaler)\n",
+    "    \"\"\"\n",
+    "    X_list, y_list = [], []\n",
+    "\n",
+    "    for (slice_type, nssi_id), g in df.groupby([\"slice_type\", \"nssi_id\"]):\n",
+    "        g = g.sort_values(\"time\").reset_index(drop=True)\n",
+    "\n",
+    "        if len(g) < window + horizon:\n",
+    "            continue # Not enough data for this specific group to form a sequence\n",
+    "\n",
+    "        # Prepare feature matrix for this group\n",
+    "        # One-hot for slice_type (same for all rows in g)\n",
+    "        slice_oh = slice_enc.transform(np.array([[slice_type]]))  # shape (1, k_slice)\n",
+    "        slice_oh_row = np.repeat(slice_oh, len(g), axis=0)        # shape (len(g), k_slice)\n",
+    "\n",
+    "        # One-hot for nssi_id (same for all rows in g)\n",
+    "        nssi_oh = nssi_enc.transform(np.array([[nssi_id]]))  # shape (1, k_nssi)\n",
+    "        nssi_oh_row = np.repeat(nssi_oh, len(g), axis=0)      # shape (len(g), k_nssi)\n",
+    "\n",
+    "        # Scale numeric features\n",
+    "        prb = scalers[\"prb\"].transform(g[[\"prb_dl\"]])\n",
+    "        data = scalers[\"data\"].transform(g[[\"data_dl\"]])\n",
+    "        rrc = scalers[\"rrc\"].transform(g[[\"rrc_succ\"]])\n",
+    "\n",
+    "        feat = np.concatenate([slice_oh_row, nssi_oh_row, prb, data, rrc], axis=1)  # shape (len(g), k_slice + k_nssi + 3)\n",
+    "\n",
+    "        # Targets (scaled)\n",
+    "        y_scaled = scalers[\"y\"].transform(g[[target_column]])  # prb_dl\n",
+    "\n",
+    "        # Build window->horizon sequences\n",
+    "        total_len = len(g)\n",
+    "        for i in range(total_len - window - horizon + 1):\n",
+    "            seq_x = feat[i:i+window, :]  # (window, features)\n",
+    "            target = y_scaled[i+window + horizon - 1, 0]  # scalar\n",
+    "            X_list.append(seq_x)\n",
+    "            y_list.append(target)\n",
+    "\n",
+    "    if not X_list:\n",
+    "        raise RuntimeError(\"Not enough data to build sequences for any (slice_type, nssi_id) group. Try reducing --window or --horizon, or check data density per group.\")\n",
+    "\n",
+    "    X = np.stack(X_list).astype(np.float32)\n",
+    "    y = np.array(y_list).astype(np.float32).reshape(-1, 1)\n",
+    "    return X, y"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Model Building Functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def build_model(input_shape: Tuple[int, int]) -> tf.keras.Model:\n",
+    "    model = Sequential([\n",
+    "        LSTM(64, return_sequences=True, input_shape=input_shape),\n",
+    "        Dropout(0.2),\n",
+    "        LSTM(32),\n",
+    "        Dropout(0.2),\n",
+    "        Dense(16, activation=\"relu\"),\n",
+    "        Dense(1)  # regression\n",
+    "    ])\n",
+    "    model.compile(loss=tf.keras.losses.Huber(delta=100), optimizer=\"adam\")\n",
+    "    # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),\n",
+    "    #               loss=\"mse\",\n",
+    "    #               metrics=[tf.keras.metrics.MeanAbsoluteError()])\n",
+    "    return model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Training and Utility Functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_val_split_time(\n",
+    "    df: pd.DataFrame,\n",
+    "    val_split: float\n",
+    ") -> pd.Timestamp:\n",
+    "    \"\"\"\n",
+    "    Compute a time threshold so that approximately val_split of the data by time goes to validation.\n",
+    "    \"\"\"\n",
+    "    times = df[\"time\"].sort_values().unique()\n",
+    "    if len(times) < 10:\n",
+    "        # fallback\n",
+    "        cutoff_idx = int(len(times) * (1 - val_split))\n",
+    "        return pd.to_datetime(times[max(0, cutoff_idx-1)], utc=True)\n",
+    "    cutoff_idx = int(len(times) * (1 - val_split))\n",
+    "    cutoff_time = pd.to_datetime(times[cutoff_idx], utc=True)\n",
+    "    return cutoff_time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def fit_model(\n",
+    "    X_train: np.ndarray, y_train: np.ndarray,\n",
+    "    X_val: np.ndarray, y_val: np.ndarray,\n",
+    "    model_dir: str,\n",
+    "    epochs: int,\n",
+    "    batch_size: int\n",
+    ") -> tf.keras.Model:\n",
+    "    os.makedirs(model_dir, exist_ok=True)\n",
+    "    ckpt_path = os.path.join(model_dir, \"best_prb_lstm.keras\")\n",
+    "\n",
+    "    callbacks = [\n",
+    "        EarlyStopping(monitor=\"val_loss\", patience=5, restore_best_weights=True),\n",
+    "        ReduceLROnPlateau(monitor=\"val_loss\", factor=0.5, patience=3),\n",
+    "        ModelCheckpoint(ckpt_path, monitor=\"val_loss\", save_best_only=True)\n",
+    "    ]\n",
+    "\n",
+    "    model = build_model(input_shape=(X_train.shape[1], X_train.shape[2]))\n",
+    "    model.summary()\n",
+    "    model.fit(\n",
+    "        X_train, y_train,\n",
+    "        validation_data=(X_val, y_val),\n",
+    "        epochs=epochs,\n",
+    "        batch_size=batch_size,\n",
+    "        callbacks=callbacks,\n",
+    "        verbose=1\n",
+    "    )\n",
+    "    # Save final as well\n",
+    "    final_path = os.path.join(model_dir, \"final_prb_lstm.keras\")\n",
+    "    model.save(final_path)\n",
+    "    return model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def save_artifacts(model_dir: str, slice_enc: OneHotEncoder, nssi_enc: OneHotEncoder, scalers: Dict[str, MinMaxScaler],\n",
+    "                   meta: Dict):\n",
+    "    os.makedirs(model_dir, exist_ok=True)\n",
+    "    dump(slice_enc, os.path.join(model_dir, \"slice_onehot.joblib\"))\n",
+    "    dump(nssi_enc, os.path.join(model_dir, \"nssi_onehot.joblib\"))\n",
+    "    for k, sc in scalers.items():\n",
+    "        dump(sc, os.path.join(model_dir, f\"scaler_{k}.joblib\"))\n",
+    "    with open(os.path.join(model_dir, \"meta.json\"), \"w\") as f:\n",
+    "        json.dump(meta, f, indent=2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Execution Pipeline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 1: Pull Data from InfluxDB"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 1) Pull data\n",
+    "df = fetch_from_influx()\n",
+    "print(f\"Data shape: {df.shape}\")\n",
+    "print(f\"Date range: {df['time'].min()} to {df['time'].max()}\")\n",
+    "print(f\"Unique slice types: {df['slice_type'].nunique()}\")\n",
+    "print(f\"Unique NSSI IDs: {df['nssi_id'].nunique()}\")\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 2: Prepare Encoders and Scalers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 2) Prepare encoders/scalers\n",
+    "slice_enc, nssi_enc, scalers = prepare_encoders_and_scalers(df)\n",
+    "print(f\"Slice encoder categories: {slice_enc.categories_[0]}\")\n",
+    "print(f\"NSSI encoder categories: {len(nssi_enc.categories_[0])} unique IDs\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 3: Build Sequences"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 3) Build sequences\n",
+    "X, y = make_sequences_per_slice(\n",
+    "        df, window, horizon,\n",
+    "        target_column=\"prb_dl\",\n",
+    "        slice_enc=slice_enc, nssi_enc=nssi_enc, scalers=scalers\n",
+    "    )\n",
+    "print(f\"Sequences shape: X={X.shape}, y={y.shape}\")\n",
+    "print(f\"Feature dimension: {X.shape[2]}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 4: Train/Validation Split by Time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 4) Train/val split by time: compute cutoff on original df time\n",
+    "cutoff_time = train_val_split_time(df, val_split=0.2)\n",
+    "print(f\"Validation cutoff time: {cutoff_time}\")\n",
+    "\n",
+    "# Map each sequence to an \"end time\" by using the window end indices\n",
+    "# This part needs to be consistent with how make_sequences_per_slice iterates\n",
+    "end_times = []\n",
+    "\n",
+    "for (slice_type, nssi_id), g in df.groupby([\"slice_type\", \"nssi_id\"]):\n",
+    "    g = g.sort_values(\"time\").reset_index(drop=True)\n",
+    "    total_len = len(g)\n",
+    "    if total_len < window + horizon:\n",
+    "        continue\n",
+    "    for i in range(total_len - window - horizon + 1):\n",
+    "        end_time = g.loc[i + window - 1, \"time\"]\n",
+    "        end_times.append(end_time.value)  # int ns since epoch\n",
+    "\n",
+    "end_times = np.array(end_times)\n",
+    "if len(end_times) != len(X):\n",
+    "    # Fallback if mapping is problematic (e.g. if X is empty due to filtering in make_sequences)\n",
+    "    print(\"[WARN] Mismatch in sequence count for time-based split. Using simple random split.\")\n",
+    "    split_idx = int(0.8 * len(X))\n",
+    "    mask_train = np.zeros(len(X), dtype=bool)\n",
+    "    mask_train[:split_idx] = True\n",
+    "    mask_val = ~mask_train\n",
+    "else:\n",
+    "    mask_train = end_times < pd.Timestamp(cutoff_time).value\n",
+    "    mask_val = ~mask_train\n",
+    "    if mask_train.sum() == 0 or mask_val.sum() == 0:\n",
+    "        print(\"[WARN] Time-based split resulted in empty train/val set. Using simple random split.\")\n",
+    "        split_idx = int(0.8 * len(X))\n",
+    "        mask_train = np.zeros(len(X), dtype=bool)\n",
+    "        mask_train[:split_idx] = True\n",
+    "        mask_val = ~mask_train\n",
+    "\n",
+    "X_train, y_train = X[mask_train], y[mask_train]\n",
+    "X_val, y_val = X[mask_val], y[mask_val]\n",
+    "\n",
+    "print(f\"Training set: {X_train.shape}\")\n",
+    "print(f\"Validation set: {X_val.shape}\")\n",
+    "print(f\"Train/Val ratio: {len(X_train)/(len(X_train)+len(X_val)):.2f}/{len(X_val)/(len(X_train)+len(X_val)):.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 5: Train Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 5) Train\n",
+    "model = fit_model(\n",
+    "        X_train, y_train, X_val, y_val,\n",
+    "        model_dir=\"models\",\n",
+    "        epochs=20,\n",
+    "        batch_size=64\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 6: Save Artifacts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 6) Save artifacts\n",
+    "meta = {\n",
+    "        \"window\": window,\n",
+    "        \"horizon\": horizon,\n",
+    "        \"features\": [\"onehot(slice_type)\", \"onehot(nssi_id)\", \"prb_dl\", \"data_dl\", \"rrc_succ\"],\n",
+    "        \"target\": \"prb_dl\",\n",
+    "        \"measurement\": measurement,\n",
+    "        \"bucket\": bucket,\n",
+    "        \"start\": start,\n",
+    "        \"val_cutoff_time\": pd.to_datetime(cutoff_time).isoformat()\n",
+    "    }\n",
+    "save_artifacts(\"models\", slice_enc, nssi_enc, scalers, meta)\n",
+    "print(\"Model artifacts saved to 'models' directory\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 7: Evaluate Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 7) Evaluate (denormalized MAE)\n",
+    "y_val_pred_scaled = model.predict(X_val).reshape(-1, 1)\n",
+    "y_val_pred = scalers[\"y\"].inverse_transform(y_val_pred_scaled)\n",
+    "y_val_true = scalers[\"y\"].inverse_transform(y_val)\n",
+    "\n",
+    "mae = np.mean(np.abs(y_val_true - y_val_pred))\n",
+    "print(f\"Validation MAE (in PRB units): {mae:.4f}\")\n",
+    "\n",
+    "rmse = root_mean_squared_error(y_val_true, y_val_pred)\n",
+    "print(\"RMSE:\", rmse)\n",
+    "\n",
+    "mae = mean_absolute_error(y_val_true, y_val_pred)\n",
+    "print(\"MAE:\", mae)\n",
+    "\n",
+    "r2 = r2_score(y_val_true, y_val_pred)\n",
+    "print(\"R2 Score:\", r2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Visualization\n",
+    "plt.figure(figsize=(12,6))\n",
+    "plt.plot(y_val_true[:200], label=\"Actual PRB Usage\", color=\"green\")\n",
+    "plt.plot(y_val_pred[:200], label=\"Forecasted PRB Usage\", color=\"red\")\n",
+    "plt.title(\"LSTM Prediction of PRB Usage\")\n",
+    "plt.xlabel(\"Time\")\n",
+    "plt.ylabel(\"PRB Usage (%)\")\n",
+    "plt.legend()\n",
+    "plt.grid(True)\n",
+    "\n",
+    "plt.savefig('lstm_forecast.jpeg', format='jpeg')\n",
+    "plt.close()\n",
+    "print(\"Forecast plot saved as 'lstm_forecast.jpeg'\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Summary\n",
+    "\n",
+    "The notebook has successfully:\n",
+    "1. ✅ Fetched NSSAI performance data from InfluxDB\n",
+    "2. ✅ Prepared encoders and scalers for data preprocessing\n",
+    "3. ✅ Created time series sequences for LSTM training\n",
+    "4. ✅ Split data into training and validation sets using time-based split\n",
+    "5. ✅ Trained LSTM model with early stopping and learning rate scheduling\n",
+    "6. ✅ Saved model artifacts (encoders, scalers, metadata) for deployment\n",
+    "7. ✅ Evaluated model performance and generated visualization\n",
+    "\n",
+    "The trained model and all preprocessing artifacts are now available in the `models` directory for use in production."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}