Jupyter notebook for RAN Slice PRB prediction using Prophet model

author karthikeyan.s <karthikeyan.s@samsung.com>

Fri, 5 Dec 2025 12:06:13 +0000 (17:36 +0530)

committer karthikeyan.s <karthikeyan.s@samsung.com>

Fri, 5 Dec 2025 12:06:13 +0000 (17:36 +0530)
author karthikeyan.s <karthikeyan.s@samsung.com>
Fri, 5 Dec 2025 12:06:13 +0000 (17:36 +0530)
committer karthikeyan.s <karthikeyan.s@samsung.com>
Fri, 5 Dec 2025 12:06:13 +0000 (17:36 +0530)
diff --git a/sample-rapp-generator/rapp-slice-prb-prediction/RAN_Slice_Prophet_PRB_Prediction.ipynb b/sample-rapp-generator/rapp-slice-prb-prediction/RAN_Slice_Prophet_PRB_Prediction.ipynb

new file mode 100644 (file)

index 0000000..ddb8c3d
--- /dev/null
+++ b/sample-rapp-generator/rapp-slice-prb-prediction/RAN_Slice_Prophet_PRB_Prediction.ipynb
@@ -0,0 +1,736 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# RAN Slice PRB Prediction with Prophet\n",
+    "\n",
+    "This notebook deploys Facebook's Prophet time series forecasting library for PRB Prediction\n",
+    "\n",
+    "## Overview\n",
+    "\n",
+    "- Fetches NSSAI performance data from InfluxDB\n",
+    "- Preprocesses data for Prophet format\n",
+    "- Creates separate Prophet models for each slice type and NSSI combination\n",
+    "- Trains and evaluates Prophet models\n",
+    "- Saves model artifacts for deployment\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Imports and Configuration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Standard library imports\n",
+    "import os\n",
+    "import sys\n",
+    "import argparse\n",
+    "import json\n",
+    "import pickle\n",
+    "from datetime import datetime, timezone\n",
+    "from typing import Tuple, List, Dict, Optional\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "\n",
+    "# Data manipulation and analysis\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "# InfluxDB client\n",
+    "from influxdb_client import InfluxDBClient\n",
+    "from influxdb_client.client.flux_table import FluxStructureEncoder\n",
+    "from influxdb_client.client.write_api import SYNCHRONOUS\n",
+    "\n",
+    "# Machine learning utilities\n",
+    "from sklearn.preprocessing import MinMaxScaler, OneHotEncoder\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error\n",
+    "\n",
+    "# Visualization\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "\n",
+    "# Prophet imports\n",
+    "from prophet import Prophet\n",
+    "from prophet.diagnostics import cross_validation, performance_metrics\n",
+    "from prophet.plot import plot_cross_validation_metric, plot_components_plotly, plot_plotly\n",
+    "\n",
+    "print(\"All imports completed successfully!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Configuration Parameters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# InfluxDB Configuration\n",
+    "influx_url = \"http://localhost:8086\"\n",
+    "influx_org = \"srib\"\n",
+    "influx_token = \"WNwnbxZog226V4gIE6Mw37UevU7jv8O4jebwCFfSVQnsT5ER_q4RpYgf4nMM6dN2c4K6pgdbrgYOoRIOt82x7A==\"\n",
+    "bucket = \"nssi_pm_bucket\"\n",
+    "measurement = \"nssi_pm_bucket\"\n",
+    "start = \"-0\"  # Time range (e.g., \"-30d\" for last 30 days)\n",
+    "\n",
+    "# Field and Tag Definitions\n",
+    "field_prb_dl = \"RRU.PrbDl.SNSSAI\"\n",
+    "field_data_dl = \"DRB.PdcpSduVolumeDL.SNSSAI\"\n",
+    "field_rrc_succ = \"RRC.ConnEstabSucc.Cause\"\n",
+    "tag_slice_type = \"sliceType\"\n",
+    "tag_nssi_id = \"measObjLdn\"\n",
+    "\n",
+    "# Model Parameters - FIXED FOR NEXT SLOT PREDICTION\n",
+    "forecast_periods = 10  # Number of periods to forecast ahead (next 2 slots)\n",
+    "freq = '15min'  # 15-minute frequency to match the data generation interval\n",
+    "\n",
+    "# Model directory\n",
+    "model_dir = \"prophet_models\"\n",
+    "\n",
+    "print(\"Configuration parameters set!\")\n",
+    "print(f\"Forecast periods: {forecast_periods}\")\n",
+    "print(f\"Frequency: {freq}\")\n",
+    "print(f\"Model directory: {model_dir}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Data Fetching Functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def build_flux_query() -> str:\n",
+    "    \"\"\"\n",
+    "    Build a Flux query that:\n",
+    "    - filters measurement\n",
+    "    - keeps relevant fields\n",
+    "    - pivots to a wide table: columns for prb_dl, data_dl, rrc_succ\n",
+    "    - keeps slice_type and nssi_id as columns\n",
+    "    \"\"\"\n",
+    "\n",
+    "    field_names=[field_prb_dl, field_data_dl, field_rrc_succ]\n",
+    "    fields_filter = \" or \".join([f'r[\"_field\"] == \"{f}\"' for f in field_names])\n",
+    "    q = f'''\n",
+    "from(bucket: \"{bucket}\")\n",
+    "  |> range(start: {start})\n",
+    "  |> filter(fn: (r) => r[\"_measurement\"] == \"{measurement}\")\n",
+    "  |> filter(fn: (r) => {fields_filter})\n",
+    "  |> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\n",
+    "  |> keep(columns: [\"_time\", \"{tag_slice_type}\", \"{tag_nssi_id}\", \"{'\",\"'.join(field_names)}\"])\n",
+    "  |> sort(columns: [\"_time\"])\n",
+    "'''\n",
+    "\n",
+    "    return q\n",
+    "\n",
+    "def fetch_from_influx() -> pd.DataFrame:\n",
+    "    \"\"\"Fetch data from InfluxDB and return as pandas DataFrame\"\"\"\n",
+    "    client = InfluxDBClient(url=influx_url, token=influx_token, org=influx_org, timeout=60_000)\n",
+    "    query_api = client.query_api()\n",
+    "    flux = build_flux_query()\n",
+    "    tables = query_api.query_data_frame(query=flux)\n",
+    "    client.close()\n",
+    "\n",
+    "    if isinstance(tables, list) and len(tables) > 0:\n",
+    "        df = pd.concat(tables, ignore_index=True)\n",
+    "    else:\n",
+    "        df = tables\n",
+    "\n",
+    "    if df is None or df.empty:\n",
+    "        raise RuntimeError(\"No data returned from InfluxDB. Check your query parameters.\")\n",
+    "\n",
+    "    # Standardize column names\n",
+    "    df = df.rename(columns={\n",
+    "        \"_time\": \"time\",\n",
+    "        tag_slice_type: \"slice_type\",\n",
+    "        tag_nssi_id: \"nssi_id\",\n",
+    "        field_prb_dl: \"prb_dl\",\n",
+    "        field_data_dl: \"data_dl\",\n",
+    "        field_rrc_succ: \"rrc_succ\"\n",
+    "    })\n",
+    "\n",
+    "    # Ensure types\n",
+    "    df[\"time\"] = pd.to_datetime(df[\"time\"], utc=True)\n",
+    "    df = df.sort_values([\"slice_type\", \"nssi_id\", \"time\"]).reset_index(drop=True)\n",
+    "\n",
+    "    # Drop rows with any NA in core columns\n",
+    "    df = df.dropna(subset=[\"slice_type\", \"nssi_id\", \"time\", \"prb_dl\", \"data_dl\", \"rrc_succ\"])\n",
+    "\n",
+    "    return df[[\"time\", \"slice_type\", \"nssi_id\", \"prb_dl\", \"data_dl\", \"rrc_succ\"]]\n",
+    "\n",
+    "print(\"Data fetching functions defined!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Data Preparation Functions for Prophet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def prepare_prophet_data(df: pd.DataFrame) -> Dict[str, Dict]:\n",
+    "    \"\"\"\n",
+    "    Prepare data for Prophet by creating separate DataFrames for each slice_type and nssi_id combination\n",
+    "    \"\"\"\n",
+    "    prophet_data = {}\n",
+    "\n",
+    "    for (slice_type, nssi_id), group in df.groupby([\"slice_type\", \"nssi_id\"]):\n",
+    "        # Sort by time\n",
+    "        group = group.sort_values(\"time\").reset_index(drop=True)\n",
+    "\n",
+    "        # Prepare Prophet format: ds (datetime) and y (target)\n",
+    "        prophet_df = group[[\"time\", \"prb_dl\"]].copy()\n",
+    "        prophet_df.columns = [\"ds\", \"y\"]\n",
+    "\n",
+    "        # FIX: Remove timezone information from ds column for Prophet compatibility\n",
+    "        # Prophet doesn't support timezone-aware datetimes in the 'ds' column\n",
+    "        prophet_df[\"ds\"] = prophet_df[\"ds\"].dt.tz_localize(None)\n",
+    "        \n",
+    "        # Add additional regressors\n",
+    "        prophet_df[\"data_dl\"] = group[\"data_dl\"].values\n",
+    "        prophet_df[\"rrc_succ\"] = group[\"rrc_succ\"].values\n",
+    "\n",
+    "        # Store with unique key\n",
+    "        key = f\"{slice_type}_{nssi_id}\"\n",
+    "        prophet_data[key] = {\n",
+    "            \"data\": prophet_df,\n",
+    "            \"slice_type\": slice_type,\n",
+    "            \"nssi_id\": nssi_id\n",
+    "        }\n",
+    "\n",
+    "    return prophet_data\n",
+    "\n",
+    "print(\"Data preparation functions defined!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Prophet Model Building Functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def build_prophet_model(data: pd.DataFrame, include_regressors: bool = True) -> Prophet:\n",
+    "    \"\"\"\n",
+    "    Build a Prophet model with appropriate configurations\n",
+    "    \"\"\"\n",
+    "    model = Prophet(\n",
+    "        yearly_seasonality=True,\n",
+    "        weekly_seasonality=True,\n",
+    "        daily_seasonality=True,\n",
+    "        changepoint_prior_scale=0.05,\n",
+    "        seasonality_prior_scale=10.0,\n",
+    "        holidays_prior_scale=10.0,\n",
+    "        mcmc_samples=0,\n",
+    "        interval_width=0.8,\n",
+    "        uncertainty_samples=1000\n",
+    "    )\n",
+    "\n",
+    "    # Add additional regressors if available\n",
+    "    if include_regressors and \"data_dl\" in data.columns and \"rrc_succ\" in data.columns:\n",
+    "        model.add_regressor(\"data_dl\", standardize=False)\n",
+    "        model.add_regressor(\"rrc_succ\", standardize=False)\n",
+    "\n",
+    "    return model\n",
+    "\n",
+    "def train_prophet_models(prophet_data: Dict[str, Dict], model_dir: str = model_dir) -> Dict[str, Prophet]:\n",
+    "    \"\"\"\n",
+    "    Train Prophet models for each slice_type and nssi_id combination\n",
+    "    \"\"\"\n",
+    "    os.makedirs(model_dir, exist_ok=True)\n",
+    "    models = {}\n",
+    "    training_metrics = {}\n",
+    "\n",
+    "    for key, data_dict in prophet_data.items():\n",
+    "        print(f\"Training Prophet model for {key}...\")\n",
+    "\n",
+    "        df = data_dict[\"data\"]\n",
+    "\n",
+    "        # Split data into train and validation (80-20 split)\n",
+    "        split_idx = int(len(df) * 0.8)\n",
+    "        train_df = df.iloc[:split_idx]\n",
+    "        val_df = df.iloc[split_idx:]\n",
+    "\n",
+    "        # Build and train model\n",
+    "        model = build_prophet_model(train_df)\n",
+    "\n",
+    "        # Fit the model\n",
+    "        model.fit(train_df)\n",
+    "\n",
+    "        # Make predictions on validation set\n",
+    "        if len(val_df) > 0:\n",
+    "            future_df = model.make_future_dataframe(periods=len(val_df), freq=freq, include_history=False)\n",
+    "\n",
+    "            # Add regressor values to future dataframe\n",
+    "            if \"data_dl\" in train_df.columns:\n",
+    "                future_df[\"data_dl\"] = val_df[\"data_dl\"].values\n",
+    "            if \"rrc_succ\" in train_df.columns:\n",
+    "                future_df[\"rrc_succ\"] = val_df[\"rrc_succ\"].values\n",
+    "\n",
+    "            forecast = model.predict(future_df)\n",
+    "\n",
+    "            # Calculate metrics\n",
+    "            y_true = val_df[\"y\"].values\n",
+    "            y_pred = forecast[\"yhat\"].values[:len(y_true)]\n",
+    "\n",
+    "            mae = mean_absolute_error(y_true, y_pred)\n",
+    "            rmse = root_mean_squared_error(y_true, y_pred)\n",
+    "            r2 = r2_score(y_true, y_pred)\n",
+    "\n",
+    "            training_metrics[key] = {\n",
+    "                \"mae\": mae,\n",
+    "                \"rmse\": rmse,\n",
+    "                \"r2\": r2,\n",
+    "                \"train_samples\": len(train_df),\n",
+    "                \"val_samples\": len(val_df)\n",
+    "            }\n",
+    "\n",
+    "            print(f\"  MAE: {mae:.4f}, RMSE: {rmse:.4f}, R2: {r2:.4f}\")\n",
+    "\n",
+    "        # Save model\n",
+    "        model_path = os.path.join(model_dir, f\"prophet_model_{key}.pkl\")\n",
+    "        with open(model_path, 'wb') as f:\n",
+    "            pickle.dump(model, f)\n",
+    "\n",
+    "        models[key] = model\n",
+    "\n",
+    "    # Save training metrics\n",
+    "    metrics_path = os.path.join(model_dir, \"training_metrics.json\")\n",
+    "    with open(metrics_path, 'w') as f:\n",
+    "        json.dump(training_metrics, f, indent=2)\n",
+    "\n",
+    "    return models\n",
+    "\n",
+    "print(\"Prophet model building functions defined!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6. Prediction and Evaluation Functions - FIXED FOR NEXT SLOT PREDICTION"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def make_forecast(models: Dict[str, Prophet], prophet_data: Dict[str, Dict], \n",
+    "                  periods: int = forecast_periods, model_dir: str = model_dir) -> Dict[str, pd.DataFrame]:\n",
+    "    \"\"\"\n",
+    "    Make forecasts using trained Prophet models for the next time slots\n",
+    "    \n",
+    "    KEY FIX: This function now correctly predicts for the next time slots\n",
+    "    starting from the last data point + 15 minutes\n",
+    "    \"\"\"\n",
+    "    forecasts = {}\n",
+    "\n",
+    "    for key, model in models.items():\n",
+    "        print(f\"Making forecast for {key}...\")\n",
+    "\n",
+    "        # Get the latest data for this key\n",
+    "        data_dict = prophet_data[key]\n",
+    "        historical_data = data_dict[\"data\"]\n",
+    "\n",
+    "        # Get the last timestamp from historical data\n",
+    "        last_timestamp = historical_data[\"ds\"].max()\n",
+    "        \n",
+    "        # Create future dataframe starting from the last timestamp + 15min\n",
+    "        future = model.make_future_dataframe(periods=periods, freq=freq, include_history=False)\n",
+    "        \n",
+    "        # Adjust the future dataframe to start from the next time slot\n",
+    "        future_times = pd.date_range(\n",
+    "            start=last_timestamp + pd.Timedelta(minutes=15),\n",
+    "            periods=periods,\n",
+    "            freq=freq\n",
+    "        )\n",
+    "        future[\"ds\"] = future_times\n",
+    "\n",
+    "        # Add regressor values (for simplicity, we'll use the last known values)\n",
+    "        if \"data_dl\" in historical_data.columns:\n",
+    "            last_data_dl = historical_data[\"data_dl\"].iloc[-1]\n",
+    "            future[\"data_dl\"] = last_data_dl\n",
+    "\n",
+    "        if \"rrc_succ\" in historical_data.columns:\n",
+    "            last_rrc_succ = historical_data[\"rrc_succ\"].iloc[-1]\n",
+    "            future[\"rrc_succ\"] = last_rrc_succ\n",
+    "\n",
+    "        # Make forecast\n",
+    "        forecast = model.predict(future)\n",
+    "\n",
+    "        # Save forecast\n",
+    "        forecast_path = os.path.join(model_dir, f\"forecast_{key}.csv\")\n",
+    "        forecast.to_csv(forecast_path, index=False)\n",
+    "\n",
+    "        forecasts[key] = forecast\n",
+    "\n",
+    "        # Print the forecast times for verification\n",
+    "        print(f\"  Forecast times for {key}:\")\n",
+    "        for idx, row in forecast.iterrows():\n",
+    "            print(f\"    {row['ds']}: {row['yhat']:.2f}\")\n",
+    "\n",
+    "    return forecasts\n",
+    "\n",
+    "print(\"Prediction and evaluation functions defined!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 7. Visualization Functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_forecasts(forecasts: Dict[str, pd.DataFrame], prophet_data: Dict[str, Dict], \n",
+    "                   save_dir: str = model_dir):\n",
+    "    \"\"\"\n",
+    "    Plot forecasts for each model\n",
+    "    \"\"\"\n",
+    "    os.makedirs(save_dir, exist_ok=True)\n",
+    "\n",
+    "    for key, forecast in forecasts.items():\n",
+    "        plt.figure(figsize=(15, 10))\n",
+    "\n",
+    "        # Plot actual vs predicted\n",
+    "        data_dict = prophet_data[key]\n",
+    "        historical_data = data_dict[\"data\"]\n",
+    "\n",
+    "        # Plot historical data\n",
+    "        plt.plot(historical_data[\"ds\"], historical_data[\"y\"], \n",
+    "                label=\"Historical PRB Usage\", color=\"blue\", alpha=0.7)\n",
+    "\n",
+    "        # Plot forecast\n",
+    "        plt.plot(forecast[\"ds\"], forecast[\"yhat\"], \n",
+    "                label=\"Forecasted PRB Usage\", color=\"red\", alpha=0.7)\n",
+    "\n",
+    "        # Plot uncertainty intervals\n",
+    "        plt.fill_between(forecast[\"ds\"], \n",
+    "                        forecast[\"yhat_lower\"], \n",
+    "                        forecast[\"yhat_upper\"], \n",
+    "                        color=\"red\", alpha=0.2, label=\"Uncertainty Interval\")\n",
+    "\n",
+    "        plt.title(f\"Prophet Forecast for {key}\")\n",
+    "        plt.xlabel(\"Time\")\n",
+    "        plt.ylabel(\"PRB Usage\")\n",
+    "        plt.legend()\n",
+    "        plt.grid(True, alpha=0.3)\n",
+    "        plt.xticks(rotation=45)\n",
+    "        plt.tight_layout()\n",
+    "\n",
+    "        # Save plot\n",
+    "        plot_path = os.path.join(save_dir, f\"prophet_forecast_{key}.png\")\n",
+    "        plt.savefig(plot_path, dpi=300, bbox_inches='tight')\n",
+    "        plt.close()\n",
+    "\n",
+    "        print(f\"Forecast plot saved for {key}\")\n",
+    "\n",
+    "print(\"Visualization functions defined!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 8. Main Execution Pipeline\n",
+    "\n",
+    "### Step 1: Fetch Data from InfluxDB"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"=== Step 1: Fetching Data ===\")\n",
+    "\n",
+    "# Check if InfluxDB configuration is set\n",
+    "if not influx_url or not influx_token or not influx_org:\n",
+    "    print(\"⚠️  Warning: InfluxDB configuration is not set!\")\n",
+    "    print(\"Please set the following variables in the Configuration cell:\")\n",
+    "    print(\"- influx_url\")\n",
+    "    print(\"- influx_token\")\n",
+    "    print(\"- influx_org\")\n",
+    "    print(\"\\nFor demonstration purposes, you can load sample data instead.\")\n",
+    "else:\n",
+    "    try:\n",
+    "        df = fetch_from_influx()\n",
+    "        print(f\"✅ Data shape: {df.shape}\")\n",
+    "        print(f\"✅ Date range: {df['time'].min()} to {df['time'].max()}\")\n",
+    "        print(f\"✅ Unique slice types: {df['slice_type'].nunique()}\")\n",
+    "        print(f\"✅ Unique NSSI IDs: {df['nssi_id'].nunique()}\")\n",
+    "\n",
+    "        # Display sample data\n",
+    "        print(\"\\nSample data:\")\n",
+    "        display(df.head())\n",
+    "\n",
+    "        # Display data statistics\n",
+    "        print(\"\\nData statistics:\")\n",
+    "        display(df.describe())\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        print(f\"❌ Error fetching data: {str(e)}\")\n",
+    "        print(\"Please check your InfluxDB configuration and connection.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 2: Prepare Data for Prophet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"=== Step 2: Preparing Prophet Data ===\")\n",
+    "\n",
+    "# Check if df exists from previous step\n",
+    "if 'df' in locals():\n",
+    "    prophet_data = prepare_prophet_data(df)\n",
+    "    print(f\"✅ Created {len(prophet_data)} Prophet datasets\")\n",
+    "\n",
+    "    # Display information about prepared datasets\n",
+    "    print(\"\\nProphet datasets summary:\")\n",
+    "    for key, data_dict in prophet_data.items():\n",
+    "        data_shape = data_dict['data'].shape\n",
+    "        print(f\"  {key}: {data_shape[0]} samples\")\n",
+    "\n",
+    "    # Display sample of one dataset\n",
+    "    sample_key = list(prophet_data.keys())[0]\n",
+    "    print(f\"\\nSample data for {sample_key}:\")\n",
+    "    display(prophet_data[sample_key]['data'].head())\n",
+    "else:\n",
+    "    print(\"❌ No data available. Please complete Step 1 first.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 3: Train Prophet Models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"=== Step 3: Training Prophet Models ===\")\n",
+    "\n",
+    "# Check if prophet_data exists from previous step\n",
+    "if 'prophet_data' in locals():\n",
+    "    models = train_prophet_models(prophet_data)\n",
+    "    print(f\"✅ Trained {len(models)} Prophet models\")\n",
+    "\n",
+    "    # Load and display training metrics\n",
+    "    metrics_path = os.path.join(model_dir, \"training_metrics.json\")\n",
+    "    if os.path.exists(metrics_path):\n",
+    "        with open(metrics_path, 'r') as f:\n",
+    "            training_metrics = json.load(f)\n",
+    "\n",
+    "        print(\"\\nTraining Metrics:\")\n",
+    "        metrics_df = pd.DataFrame(training_metrics).T\n",
+    "        display(metrics_df)\n",
+    "\n",
+    "        # Plot training metrics\n",
+    "        fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n",
+    "        metrics_df['mae'].plot(kind='bar', ax=axes[0,0], title='Mean Absolute Error')\n",
+    "        metrics_df['rmse'].plot(kind='bar', ax=axes[0,1], title='Root Mean Square Error')\n",
+    "        metrics_df['r2'].plot(kind='bar', ax=axes[1,0], title='R² Score')\n",
+    "        metrics_df[['train_samples', 'val_samples']].plot(kind='bar', ax=axes[1,1], title='Sample Sizes')\n",
+    "\n",
+    "        plt.tight_layout()\n",
+    "        plt.show()\n",
+    "else:\n",
+    "    print(\"❌ No Prophet data available. Please complete Step 2 first.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 4: Make Forecasts - NOW PREDICTS NEXT SLOTS CORRECTLY"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"=== Step 4: Making Forecasts ===\")\n",
+    "\n",
+    "# Check if models exist from previous step\n",
+    "if 'models' in locals():\n",
+    "    forecasts = make_forecast(models, prophet_data)\n",
+    "    print(f\"✅ Generated forecasts for {len(forecasts)} models\")\n",
+    "\n",
+    "    # Display forecast summary\n",
+    "    print(\"\\nForecast Summary:\")\n",
+    "    for key, forecast in forecasts.items():\n",
+    "        forecast_shape = forecast.shape\n",
+    "        last_date = forecast['ds'].max()\n",
+    "        print(f\"  {key}: {forecast_shape[0]} total points, forecast until {last_date}\")\n",
+    "\n",
+    "    # Display sample forecast for one model\n",
+    "    sample_key = list(forecasts.keys())[0]\n",
+    "    print(f\"\\nSample forecast for {sample_key}:\")\n",
+    "    sample_forecast = forecasts[sample_key].tail(10)  # Show last 10 forecast points\n",
+    "    display(sample_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])\n",
+    "else:\n",
+    "    print(\"❌ No trained models available. Please complete Step 3 first.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 5: Create Visualizations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"=== Step 5: Creating Visualizations ===\")\n",
+    "\n",
+    "# Check if forecasts exist from previous step\n",
+    "if 'forecasts' in locals():\n",
+    "    plot_forecasts(forecasts, prophet_data)\n",
+    "    print(\"✅ All forecast plots created and saved!\")\n",
+    "\n",
+    "    # Display one of the generated plots inline\n",
+    "    sample_key = list(forecasts.keys())[0]\n",
+    "    plot_path = os.path.join(model_dir, f\"prophet_forecast_{sample_key}.png\")\n",
+    "\n",
+    "    if os.path.exists(plot_path):\n",
+    "        print(f\"\\nDisplaying forecast plot for {sample_key}:\")\n",
+    "        from IPython.display import Image\n",
+    "        display(Image(filename=plot_path))\n",
+    "else:\n",
+    "    print(\"❌ No forecasts available. Please complete Step 4 first.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 6: Save Metadata"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"=== Step 6: Saving Metadata ===\")\n",
+    "\n",
+    "# Check if we have the required data\n",
+    "if 'df' in locals() and 'models' in locals():\n",
+    "    metadata = {\n",
+    "        \"model_type\": \"Prophet\",\n",
+    "        \"forecast_periods\": forecast_periods,\n",
+    "        \"frequency\": freq,\n",
+    "        \"total_models\": len(models),\n",
+    "        \"data_range\": {\n",
+    "            \"start\": df[\"time\"].min().isoformat(),\n",
+    "            \"end\": df[\"time\"].max().isoformat()\n",
+    "        },\n",
+    "        \"slice_types\": df[\"slice_type\"].unique().tolist(),\n",
+    "        \"nssi_ids\": df[\"nssi_id\"].unique().tolist(),\n",
+    "        \"features\": [\"prb_dl\", \"data_dl\", \"rrc_succ\"],\n",
+    "        \"target\": \"prb_dl\",\n",
+    "        \"created_at\": datetime.now().isoformat(),\n",
+    "        \"model_directory\": model_dir,\n",
+    "        \"fix_applied\": \"Updated to predict next 15-minute slots instead of hourly predictions\"\n",
+    "    }\n",
+    "\n",
+    "    with open(os.path.join(model_dir, \"metadata.json\"), \"w\") as f:\n",
+    "        json.dump(metadata, f, indent=2)\n",
+    "\n",
+    "    print(\"✅ Metadata saved successfully!\")\n",
+    "    print(\"\\nPipeline Metadata:\")\n",
+    "    for key, value in metadata.items():\n",
+    "        print(f\"  {key}: {value}\")\n",
+    "\n",
+    "    print(f\"\\n📁 All artifacts saved to '{model_dir}' directory:\")\n",
+    "    if os.path.exists(model_dir):\n",
+    "        files = os.listdir(model_dir)\n",
+    "        for file in sorted(files):\n",
+    "            print(f\"  - {file}\")\n",
+    "else:\n",
+    "    print(\"❌ Required data not available. Please complete previous steps first.\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
author	karthikeyan.s <karthikeyan.s@samsung.com>
	Fri, 5 Dec 2025 12:06:13 +0000 (17:36 +0530)
committer	karthikeyan.s <karthikeyan.s@samsung.com>
	Fri, 5 Dec 2025 12:06:13 +0000 (17:36 +0530)