--- /dev/null
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# RAN Slice PRB Prediction rApp Model Generator\n",
+ "\n",
+ "This notebook generates an LSTM model for predicting Physical Resource Block (PRB) usage in RAN slices.\n",
+ "\n",
+ "## Overview\n",
+ "- Fetches NSSAI performance data from InfluxDB\n",
+ "- Preprocesses data with encoders and scalers\n",
+ "- Creates time series sequences for LSTM training\n",
+ "- Trains and evaluates LSTM model\n",
+ "- Saves model artifacts for deployment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Imports and Configuration\n",
+ "import os\n",
+ "import sys\n",
+ "import argparse\n",
+ "import json\n",
+ "from datetime import datetime, timezone\n",
+ "from typing import Tuple, List, Dict, Optional\n",
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "\n",
+ "from influxdb_client import InfluxDBClient\n",
+ "from influxdb_client.client.flux_table import FluxStructureEncoder\n",
+ "from influxdb_client.client.write_api import SYNCHRONOUS\n",
+ "\n",
+ "from sklearn.preprocessing import MinMaxScaler, OneHotEncoder\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from joblib import dump, load\n",
+ "\n",
+ "import tensorflow as tf\n",
+ "from tensorflow.keras.models import Sequential\n",
+ "from tensorflow.keras.layers import LSTM, Dense, Dropout, Attention\n",
+ "from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint\n",
+ "\n",
+ "from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error\n",
+ "import matplotlib.pyplot as plt\n",
+ "from typing import List, Optional"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# InfluxDB Configuration\n",
+ "influx_url = \"http://localhost:8086\"\n",
+ "influx_org = \"srib\"\n",
+ "influx_token = \"mRVPMAc6Wa9npA-mvJY2t3ttYS4Sr5JXRflkk81pt_edEz05aGujt2cnNBm3kITix6qRwLCr7HejEV6ufskOcA==\"\n",
+ "bucket = \"nssi_pm_bucket\"\n",
+ "measurement = \"nssi_pm_bucket\"\n",
+ "start = \"-0\"\n",
+ "\n",
+ "# Field and Tag Definitions\n",
+ "field_prb_dl = \"RRU.PrbDl.SNSSAI\"\n",
+ "field_data_dl = \"DRB.PdcpSduVolumeDL.SNSSAI\"\n",
+ "field_rrc_succ = \"RRC.ConnEstabSucc.Cause\"\n",
+ "tag_slice_type = \"sliceType\"\n",
+ "tag_nssi_id = \"measObjLdn\"\n",
+ "\n",
+ "# Model Parameters\n",
+ "window = 672\n",
+ "horizon = 1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Data Fetching Functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def build_flux_query() -> str:\n",
+ " \"\"\"\n",
+ " Build a Flux query that:\n",
+ " - filters measurement\n",
+ " - keeps relevant fields\n",
+ " - pivots to a wide table: columns for prb_dl, data_dl, rrc_succ\n",
+ " - keeps slice_type and nssi_id as columns\n",
+ " \"\"\"\n",
+ "\n",
+ " field_names=[field_prb_dl, field_data_dl, field_rrc_succ]\n",
+ " fields_filter = \" or \".join([f'r[\"_field\"] == \"{f}\"' for f in field_names])\n",
+ " q = f'''\n",
+ "from(bucket: \"{bucket}\")\n",
+ " |> range(start: {start})\n",
+ " |> filter(fn: (r) => r[\"_measurement\"] == \"{measurement}\")\n",
+ " |> filter(fn: (r) => {fields_filter})\n",
+ " |> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\n",
+ " |> keep(columns: [\"_time\", \"{tag_slice_type}\", \"{tag_nssi_id}\", \"{'\",\"'.join(field_names)}\"])\n",
+ " |> sort(columns: [\"_time\"])\n",
+ "'''\n",
+ "\n",
+ " return q"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def fetch_from_influx() -> pd.DataFrame:\n",
+ " client = InfluxDBClient(url=influx_url, token=influx_token, org=influx_org, timeout=60_000)\n",
+ " query_api = client.query_api()\n",
+ " flux = build_flux_query()\n",
+ " tables = query_api.query_data_frame(query=flux)\n",
+ " client.close()\n",
+ "\n",
+ " if isinstance(tables, list) and len(tables) > 0:\n",
+ " df = pd.concat(tables, ignore_index=True)\n",
+ " else:\n",
+ " df = tables\n",
+ "\n",
+ " if df is None or df.empty:\n",
+ " raise RuntimeError(\"No data returned from InfluxDB. Check your query parameters.\")\n",
+ "\n",
+ " # Standardize column names\n",
+ " df = df.rename(columns={\n",
+ " \"_time\": \"time\",\n",
+ " tag_slice_type: \"slice_type\",\n",
+ " tag_nssi_id: \"nssi_id\", # Renaming measObjLdn to nssi_id\n",
+ " field_prb_dl: \"prb_dl\",\n",
+ " field_data_dl: \"data_dl\",\n",
+ " field_rrc_succ: \"rrc_succ\"\n",
+ " })\n",
+ "\n",
+ " # Ensure types\n",
+ " df[\"time\"] = pd.to_datetime(df[\"time\"], utc=True)\n",
+ " df = df.sort_values([\"slice_type\", \"nssi_id\", \"time\"]).reset_index(drop=True)\n",
+ "\n",
+ " # Drop rows with any NA in core columns\n",
+ " df = df.dropna(subset=[\"slice_type\", \"nssi_id\", \"time\", \"prb_dl\", \"data_dl\", \"rrc_succ\"])\n",
+ "\n",
+ " return df[[\"time\", \"slice_type\", \"nssi_id\", \"prb_dl\", \"data_dl\", \"rrc_succ\"]]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Data Preparation Functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def prepare_encoders_and_scalers(df: pd.DataFrame) -> Tuple[OneHotEncoder, OneHotEncoder, Dict[str, MinMaxScaler]]:\n",
+ " slice_enc = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False)\n",
+ " slice_enc.fit(df[[\"slice_type\"]])\n",
+ "\n",
+ " nssi_enc = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False)\n",
+ " nssi_enc.fit(df[[\"nssi_id\"]])\n",
+ "\n",
+ " scalers = {\n",
+ " \"prb\": MinMaxScaler(),\n",
+ " \"data\": MinMaxScaler(),\n",
+ " \"rrc\": MinMaxScaler(),\n",
+ " \"y\": MinMaxScaler()\n",
+ " }\n",
+ " scalers[\"prb\"].fit(df[[\"prb_dl\"]])\n",
+ " scalers[\"data\"].fit(df[[\"data_dl\"]])\n",
+ " scalers[\"rrc\"].fit(df[[\"rrc_succ\"]])\n",
+ " scalers[\"y\"].fit(df[[\"prb_dl\"]]) # target same as prb_dl\n",
+ "\n",
+ " return slice_enc, nssi_enc, scalers"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def make_sequences_per_slice(\n",
+ " df: pd.DataFrame,\n",
+ " window: int,\n",
+ " horizon: int,\n",
+ " target_column: str,\n",
+ " slice_enc: OneHotEncoder,\n",
+ " nssi_enc: OneHotEncoder,\n",
+ " scalers: Dict[str, MinMaxScaler]\n",
+ ") -> Tuple[np.ndarray, np.ndarray]:\n",
+ " \"\"\"\n",
+ " For each (slice_type, nssi_id) combination, use rolling windows to create sequences.\n",
+ " Features per timestep: [one-hot(slice_type), one-hot(nssi_id), scaled(prb_dl, data_dl, rrc_succ)]\n",
+ " Target: next-step prb_dl (scaled with y_scaler)\n",
+ " \"\"\"\n",
+ " X_list, y_list = [], []\n",
+ "\n",
+ " for (slice_type, nssi_id), g in df.groupby([\"slice_type\", \"nssi_id\"]):\n",
+ " g = g.sort_values(\"time\").reset_index(drop=True)\n",
+ "\n",
+ " if len(g) < window + horizon:\n",
+ " continue # Not enough data for this specific group to form a sequence\n",
+ "\n",
+ " # Prepare feature matrix for this group\n",
+ " # One-hot for slice_type (same for all rows in g)\n",
+ " slice_oh = slice_enc.transform(np.array([[slice_type]])) # shape (1, k_slice)\n",
+ " slice_oh_row = np.repeat(slice_oh, len(g), axis=0) # shape (len(g), k_slice)\n",
+ "\n",
+ " # One-hot for nssi_id (same for all rows in g)\n",
+ " nssi_oh = nssi_enc.transform(np.array([[nssi_id]])) # shape (1, k_nssi)\n",
+ " nssi_oh_row = np.repeat(nssi_oh, len(g), axis=0) # shape (len(g), k_nssi)\n",
+ "\n",
+ " # Scale numeric features\n",
+ " prb = scalers[\"prb\"].transform(g[[\"prb_dl\"]])\n",
+ " data = scalers[\"data\"].transform(g[[\"data_dl\"]])\n",
+ " rrc = scalers[\"rrc\"].transform(g[[\"rrc_succ\"]])\n",
+ "\n",
+ " feat = np.concatenate([slice_oh_row, nssi_oh_row, prb, data, rrc], axis=1) # shape (len(g), k_slice + k_nssi + 3)\n",
+ "\n",
+ " # Targets (scaled)\n",
+ " y_scaled = scalers[\"y\"].transform(g[[target_column]]) # prb_dl\n",
+ "\n",
+ " # Build window->horizon sequences\n",
+ " total_len = len(g)\n",
+ " for i in range(total_len - window - horizon + 1):\n",
+ " seq_x = feat[i:i+window, :] # (window, features)\n",
+ " target = y_scaled[i+window + horizon - 1, 0] # scalar\n",
+ " X_list.append(seq_x)\n",
+ " y_list.append(target)\n",
+ "\n",
+ " if not X_list:\n",
+ " raise RuntimeError(\"Not enough data to build sequences for any (slice_type, nssi_id) group. Try reducing --window or --horizon, or check data density per group.\")\n",
+ "\n",
+ " X = np.stack(X_list).astype(np.float32)\n",
+ " y = np.array(y_list).astype(np.float32).reshape(-1, 1)\n",
+ " return X, y"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Model Building Functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def build_model(input_shape: Tuple[int, int]) -> tf.keras.Model:\n",
+ " model = Sequential([\n",
+ " LSTM(64, return_sequences=True, input_shape=input_shape),\n",
+ " Dropout(0.2),\n",
+ " LSTM(32),\n",
+ " Dropout(0.2),\n",
+ " Dense(16, activation=\"relu\"),\n",
+ " Dense(1) # regression\n",
+ " ])\n",
+ " model.compile(loss=tf.keras.losses.Huber(delta=100), optimizer=\"adam\")\n",
+ " # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),\n",
+ " # loss=\"mse\",\n",
+ " # metrics=[tf.keras.metrics.MeanAbsoluteError()])\n",
+ " return model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Training and Utility Functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def train_val_split_time(\n",
+ " df: pd.DataFrame,\n",
+ " val_split: float\n",
+ ") -> pd.Timestamp:\n",
+ " \"\"\"\n",
+ " Compute a time threshold so that approximately val_split of the data by time goes to validation.\n",
+ " \"\"\"\n",
+ " times = df[\"time\"].sort_values().unique()\n",
+ " if len(times) < 10:\n",
+ " # fallback\n",
+ " cutoff_idx = int(len(times) * (1 - val_split))\n",
+ " return pd.to_datetime(times[max(0, cutoff_idx-1)], utc=True)\n",
+ " cutoff_idx = int(len(times) * (1 - val_split))\n",
+ " cutoff_time = pd.to_datetime(times[cutoff_idx], utc=True)\n",
+ " return cutoff_time"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def fit_model(\n",
+ " X_train: np.ndarray, y_train: np.ndarray,\n",
+ " X_val: np.ndarray, y_val: np.ndarray,\n",
+ " model_dir: str,\n",
+ " epochs: int,\n",
+ " batch_size: int\n",
+ ") -> tf.keras.Model:\n",
+ " os.makedirs(model_dir, exist_ok=True)\n",
+ " ckpt_path = os.path.join(model_dir, \"best_prb_lstm.keras\")\n",
+ "\n",
+ " callbacks = [\n",
+ " EarlyStopping(monitor=\"val_loss\", patience=5, restore_best_weights=True),\n",
+ " ReduceLROnPlateau(monitor=\"val_loss\", factor=0.5, patience=3),\n",
+ " ModelCheckpoint(ckpt_path, monitor=\"val_loss\", save_best_only=True)\n",
+ " ]\n",
+ "\n",
+ " model = build_model(input_shape=(X_train.shape[1], X_train.shape[2]))\n",
+ " model.summary()\n",
+ " model.fit(\n",
+ " X_train, y_train,\n",
+ " validation_data=(X_val, y_val),\n",
+ " epochs=epochs,\n",
+ " batch_size=batch_size,\n",
+ " callbacks=callbacks,\n",
+ " verbose=1\n",
+ " )\n",
+ " # Save final as well\n",
+ " final_path = os.path.join(model_dir, \"final_prb_lstm.keras\")\n",
+ " model.save(final_path)\n",
+ " return model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def save_artifacts(model_dir: str, slice_enc: OneHotEncoder, nssi_enc: OneHotEncoder, scalers: Dict[str, MinMaxScaler],\n",
+ " meta: Dict):\n",
+ " os.makedirs(model_dir, exist_ok=True)\n",
+ " dump(slice_enc, os.path.join(model_dir, \"slice_onehot.joblib\"))\n",
+ " dump(nssi_enc, os.path.join(model_dir, \"nssi_onehot.joblib\"))\n",
+ " for k, sc in scalers.items():\n",
+ " dump(sc, os.path.join(model_dir, f\"scaler_{k}.joblib\"))\n",
+ " with open(os.path.join(model_dir, \"meta.json\"), \"w\") as f:\n",
+ " json.dump(meta, f, indent=2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Execution Pipeline"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 1: Pull Data from InfluxDB"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1) Pull data\n",
+ "df = fetch_from_influx()\n",
+ "print(f\"Data shape: {df.shape}\")\n",
+ "print(f\"Date range: {df['time'].min()} to {df['time'].max()}\")\n",
+ "print(f\"Unique slice types: {df['slice_type'].nunique()}\")\n",
+ "print(f\"Unique NSSI IDs: {df['nssi_id'].nunique()}\")\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 2: Prepare Encoders and Scalers"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 2) Prepare encoders/scalers\n",
+ "slice_enc, nssi_enc, scalers = prepare_encoders_and_scalers(df)\n",
+ "print(f\"Slice encoder categories: {slice_enc.categories_[0]}\")\n",
+ "print(f\"NSSI encoder categories: {len(nssi_enc.categories_[0])} unique IDs\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 3: Build Sequences"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 3) Build sequences\n",
+ "X, y = make_sequences_per_slice(\n",
+ " df, window, horizon,\n",
+ " target_column=\"prb_dl\",\n",
+ " slice_enc=slice_enc, nssi_enc=nssi_enc, scalers=scalers\n",
+ " )\n",
+ "print(f\"Sequences shape: X={X.shape}, y={y.shape}\")\n",
+ "print(f\"Feature dimension: {X.shape[2]}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 4: Train/Validation Split by Time"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 4) Train/val split by time: compute cutoff on original df time\n",
+ "cutoff_time = train_val_split_time(df, val_split=0.2)\n",
+ "print(f\"Validation cutoff time: {cutoff_time}\")\n",
+ "\n",
+ "# Map each sequence to an \"end time\" by using the window end indices\n",
+ "# This part needs to be consistent with how make_sequences_per_slice iterates\n",
+ "end_times = []\n",
+ "\n",
+ "for (slice_type, nssi_id), g in df.groupby([\"slice_type\", \"nssi_id\"]):\n",
+ " g = g.sort_values(\"time\").reset_index(drop=True)\n",
+ " total_len = len(g)\n",
+ " if total_len < window + horizon:\n",
+ " continue\n",
+ " for i in range(total_len - window - horizon + 1):\n",
+ " end_time = g.loc[i + window - 1, \"time\"]\n",
+ " end_times.append(end_time.value) # int ns since epoch\n",
+ "\n",
+ "end_times = np.array(end_times)\n",
+ "if len(end_times) != len(X):\n",
+ " # Fallback if mapping is problematic (e.g. if X is empty due to filtering in make_sequences)\n",
+ " print(\"[WARN] Mismatch in sequence count for time-based split. Using simple random split.\")\n",
+ " split_idx = int(0.8 * len(X))\n",
+ " mask_train = np.zeros(len(X), dtype=bool)\n",
+ " mask_train[:split_idx] = True\n",
+ " mask_val = ~mask_train\n",
+ "else:\n",
+ " mask_train = end_times < pd.Timestamp(cutoff_time).value\n",
+ " mask_val = ~mask_train\n",
+ " if mask_train.sum() == 0 or mask_val.sum() == 0:\n",
+ " print(\"[WARN] Time-based split resulted in empty train/val set. Using simple random split.\")\n",
+ " split_idx = int(0.8 * len(X))\n",
+ " mask_train = np.zeros(len(X), dtype=bool)\n",
+ " mask_train[:split_idx] = True\n",
+ " mask_val = ~mask_train\n",
+ "\n",
+ "X_train, y_train = X[mask_train], y[mask_train]\n",
+ "X_val, y_val = X[mask_val], y[mask_val]\n",
+ "\n",
+ "print(f\"Training set: {X_train.shape}\")\n",
+ "print(f\"Validation set: {X_val.shape}\")\n",
+ "print(f\"Train/Val ratio: {len(X_train)/(len(X_train)+len(X_val)):.2f}/{len(X_val)/(len(X_train)+len(X_val)):.2f}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 5: Train Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 5) Train\n",
+ "model = fit_model(\n",
+ " X_train, y_train, X_val, y_val,\n",
+ " model_dir=\"models\",\n",
+ " epochs=20,\n",
+ " batch_size=64\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 6: Save Artifacts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 6) Save artifacts\n",
+ "meta = {\n",
+ " \"window\": window,\n",
+ " \"horizon\": horizon,\n",
+ " \"features\": [\"onehot(slice_type)\", \"onehot(nssi_id)\", \"prb_dl\", \"data_dl\", \"rrc_succ\"],\n",
+ " \"target\": \"prb_dl\",\n",
+ " \"measurement\": measurement,\n",
+ " \"bucket\": bucket,\n",
+ " \"start\": start,\n",
+ " \"val_cutoff_time\": pd.to_datetime(cutoff_time).isoformat()\n",
+ " }\n",
+ "save_artifacts(\"models\", slice_enc, nssi_enc, scalers, meta)\n",
+ "print(\"Model artifacts saved to 'models' directory\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 7: Evaluate Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 7) Evaluate (denormalized MAE)\n",
+ "y_val_pred_scaled = model.predict(X_val).reshape(-1, 1)\n",
+ "y_val_pred = scalers[\"y\"].inverse_transform(y_val_pred_scaled)\n",
+ "y_val_true = scalers[\"y\"].inverse_transform(y_val)\n",
+ "\n",
+ "mae = np.mean(np.abs(y_val_true - y_val_pred))\n",
+ "print(f\"Validation MAE (in PRB units): {mae:.4f}\")\n",
+ "\n",
+ "rmse = root_mean_squared_error(y_val_true, y_val_pred)\n",
+ "print(\"RMSE:\", rmse)\n",
+ "\n",
+ "mae = mean_absolute_error(y_val_true, y_val_pred)\n",
+ "print(\"MAE:\", mae)\n",
+ "\n",
+ "r2 = r2_score(y_val_true, y_val_pred)\n",
+ "print(\"R2 Score:\", r2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Visualization\n",
+ "plt.figure(figsize=(12,6))\n",
+ "plt.plot(y_val_true[:200], label=\"Actual PRB Usage\", color=\"green\")\n",
+ "plt.plot(y_val_pred[:200], label=\"Forecasted PRB Usage\", color=\"red\")\n",
+ "plt.title(\"LSTM Prediction of PRB Usage\")\n",
+ "plt.xlabel(\"Time\")\n",
+ "plt.ylabel(\"PRB Usage (%)\")\n",
+ "plt.legend()\n",
+ "plt.grid(True)\n",
+ "\n",
+ "plt.savefig('lstm_forecast.jpeg', format='jpeg')\n",
+ "plt.close()\n",
+ "print(\"Forecast plot saved as 'lstm_forecast.jpeg'\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "\n",
+ "The notebook has successfully:\n",
+ "1. ✅ Fetched NSSAI performance data from InfluxDB\n",
+ "2. ✅ Prepared encoders and scalers for data preprocessing\n",
+ "3. ✅ Created time series sequences for LSTM training\n",
+ "4. ✅ Split data into training and validation sets using time-based split\n",
+ "5. ✅ Trained LSTM model with early stopping and learning rate scheduling\n",
+ "6. ✅ Saved model artifacts (encoders, scalers, metadata) for deployment\n",
+ "7. ✅ Evaluated model performance and generated visualization\n",
+ "\n",
+ "The trained model and all preprocessing artifacts are now available in the `models` directory for use in production."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}