--- /dev/null
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# RAN Slice PRB Prediction with Prophet\n",
+ "\n",
+ "This notebook deploys Facebook's Prophet time series forecasting library for PRB Prediction\n",
+ "\n",
+ "## Overview\n",
+ "\n",
+ "- Fetches NSSAI performance data from InfluxDB\n",
+ "- Preprocesses data for Prophet format\n",
+ "- Creates separate Prophet models for each slice type and NSSI combination\n",
+ "- Trains and evaluates Prophet models\n",
+ "- Saves model artifacts for deployment\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 1. Imports and Configuration"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Standard library imports\n",
+ "import os\n",
+ "import sys\n",
+ "import argparse\n",
+ "import json\n",
+ "import pickle\n",
+ "from datetime import datetime, timezone\n",
+ "from typing import Tuple, List, Dict, Optional\n",
+ "import warnings\n",
+ "warnings.filterwarnings('ignore')\n",
+ "\n",
+ "# Data manipulation and analysis\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "\n",
+ "# InfluxDB client\n",
+ "from influxdb_client import InfluxDBClient\n",
+ "from influxdb_client.client.flux_table import FluxStructureEncoder\n",
+ "from influxdb_client.client.write_api import SYNCHRONOUS\n",
+ "\n",
+ "# Machine learning utilities\n",
+ "from sklearn.preprocessing import MinMaxScaler, OneHotEncoder\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error\n",
+ "\n",
+ "# Visualization\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "\n",
+ "# Prophet imports\n",
+ "from prophet import Prophet\n",
+ "from prophet.diagnostics import cross_validation, performance_metrics\n",
+ "from prophet.plot import plot_cross_validation_metric, plot_components_plotly, plot_plotly\n",
+ "\n",
+ "print(\"All imports completed successfully!\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 2. Configuration Parameters"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# InfluxDB Configuration\n",
+ "influx_url = \"http://localhost:8086\"\n",
+ "influx_org = \"srib\"\n",
+ "influx_token = \"WNwnbxZog226V4gIE6Mw37UevU7jv8O4jebwCFfSVQnsT5ER_q4RpYgf4nMM6dN2c4K6pgdbrgYOoRIOt82x7A==\"\n",
+ "bucket = \"nssi_pm_bucket\"\n",
+ "measurement = \"nssi_pm_bucket\"\n",
+ "start = \"-0\" # Time range (e.g., \"-30d\" for last 30 days)\n",
+ "\n",
+ "# Field and Tag Definitions\n",
+ "field_prb_dl = \"RRU.PrbDl.SNSSAI\"\n",
+ "field_data_dl = \"DRB.PdcpSduVolumeDL.SNSSAI\"\n",
+ "field_rrc_succ = \"RRC.ConnEstabSucc.Cause\"\n",
+ "tag_slice_type = \"sliceType\"\n",
+ "tag_nssi_id = \"measObjLdn\"\n",
+ "\n",
+ "# Model Parameters - FIXED FOR NEXT SLOT PREDICTION\n",
+ "forecast_periods = 10 # Number of periods to forecast ahead (next 2 slots)\n",
+ "freq = '15min' # 15-minute frequency to match the data generation interval\n",
+ "\n",
+ "# Model directory\n",
+ "model_dir = \"prophet_models\"\n",
+ "\n",
+ "print(\"Configuration parameters set!\")\n",
+ "print(f\"Forecast periods: {forecast_periods}\")\n",
+ "print(f\"Frequency: {freq}\")\n",
+ "print(f\"Model directory: {model_dir}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 3. Data Fetching Functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def build_flux_query() -> str:\n",
+ " \"\"\"\n",
+ " Build a Flux query that:\n",
+ " - filters measurement\n",
+ " - keeps relevant fields\n",
+ " - pivots to a wide table: columns for prb_dl, data_dl, rrc_succ\n",
+ " - keeps slice_type and nssi_id as columns\n",
+ " \"\"\"\n",
+ "\n",
+ " field_names=[field_prb_dl, field_data_dl, field_rrc_succ]\n",
+ " fields_filter = \" or \".join([f'r[\"_field\"] == \"{f}\"' for f in field_names])\n",
+ " q = f'''\n",
+ "from(bucket: \"{bucket}\")\n",
+ " |> range(start: {start})\n",
+ " |> filter(fn: (r) => r[\"_measurement\"] == \"{measurement}\")\n",
+ " |> filter(fn: (r) => {fields_filter})\n",
+ " |> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\n",
+ " |> keep(columns: [\"_time\", \"{tag_slice_type}\", \"{tag_nssi_id}\", \"{'\",\"'.join(field_names)}\"])\n",
+ " |> sort(columns: [\"_time\"])\n",
+ "'''\n",
+ "\n",
+ " return q\n",
+ "\n",
+ "def fetch_from_influx() -> pd.DataFrame:\n",
+ " \"\"\"Fetch data from InfluxDB and return as pandas DataFrame\"\"\"\n",
+ " client = InfluxDBClient(url=influx_url, token=influx_token, org=influx_org, timeout=60_000)\n",
+ " query_api = client.query_api()\n",
+ " flux = build_flux_query()\n",
+ " tables = query_api.query_data_frame(query=flux)\n",
+ " client.close()\n",
+ "\n",
+ " if isinstance(tables, list) and len(tables) > 0:\n",
+ " df = pd.concat(tables, ignore_index=True)\n",
+ " else:\n",
+ " df = tables\n",
+ "\n",
+ " if df is None or df.empty:\n",
+ " raise RuntimeError(\"No data returned from InfluxDB. Check your query parameters.\")\n",
+ "\n",
+ " # Standardize column names\n",
+ " df = df.rename(columns={\n",
+ " \"_time\": \"time\",\n",
+ " tag_slice_type: \"slice_type\",\n",
+ " tag_nssi_id: \"nssi_id\",\n",
+ " field_prb_dl: \"prb_dl\",\n",
+ " field_data_dl: \"data_dl\",\n",
+ " field_rrc_succ: \"rrc_succ\"\n",
+ " })\n",
+ "\n",
+ " # Ensure types\n",
+ " df[\"time\"] = pd.to_datetime(df[\"time\"], utc=True)\n",
+ " df = df.sort_values([\"slice_type\", \"nssi_id\", \"time\"]).reset_index(drop=True)\n",
+ "\n",
+ " # Drop rows with any NA in core columns\n",
+ " df = df.dropna(subset=[\"slice_type\", \"nssi_id\", \"time\", \"prb_dl\", \"data_dl\", \"rrc_succ\"])\n",
+ "\n",
+ " return df[[\"time\", \"slice_type\", \"nssi_id\", \"prb_dl\", \"data_dl\", \"rrc_succ\"]]\n",
+ "\n",
+ "print(\"Data fetching functions defined!\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 4. Data Preparation Functions for Prophet"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def prepare_prophet_data(df: pd.DataFrame) -> Dict[str, Dict]:\n",
+ " \"\"\"\n",
+ " Prepare data for Prophet by creating separate DataFrames for each slice_type and nssi_id combination\n",
+ " \"\"\"\n",
+ " prophet_data = {}\n",
+ "\n",
+ " for (slice_type, nssi_id), group in df.groupby([\"slice_type\", \"nssi_id\"]):\n",
+ " # Sort by time\n",
+ " group = group.sort_values(\"time\").reset_index(drop=True)\n",
+ "\n",
+ " # Prepare Prophet format: ds (datetime) and y (target)\n",
+ " prophet_df = group[[\"time\", \"prb_dl\"]].copy()\n",
+ " prophet_df.columns = [\"ds\", \"y\"]\n",
+ "\n",
+ " # FIX: Remove timezone information from ds column for Prophet compatibility\n",
+ " # Prophet doesn't support timezone-aware datetimes in the 'ds' column\n",
+ " prophet_df[\"ds\"] = prophet_df[\"ds\"].dt.tz_localize(None)\n",
+ " \n",
+ " # Add additional regressors\n",
+ " prophet_df[\"data_dl\"] = group[\"data_dl\"].values\n",
+ " prophet_df[\"rrc_succ\"] = group[\"rrc_succ\"].values\n",
+ "\n",
+ " # Store with unique key\n",
+ " key = f\"{slice_type}_{nssi_id}\"\n",
+ " prophet_data[key] = {\n",
+ " \"data\": prophet_df,\n",
+ " \"slice_type\": slice_type,\n",
+ " \"nssi_id\": nssi_id\n",
+ " }\n",
+ "\n",
+ " return prophet_data\n",
+ "\n",
+ "print(\"Data preparation functions defined!\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 5. Prophet Model Building Functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def build_prophet_model(data: pd.DataFrame, include_regressors: bool = True) -> Prophet:\n",
+ " \"\"\"\n",
+ " Build a Prophet model with appropriate configurations\n",
+ " \"\"\"\n",
+ " model = Prophet(\n",
+ " yearly_seasonality=True,\n",
+ " weekly_seasonality=True,\n",
+ " daily_seasonality=True,\n",
+ " changepoint_prior_scale=0.05,\n",
+ " seasonality_prior_scale=10.0,\n",
+ " holidays_prior_scale=10.0,\n",
+ " mcmc_samples=0,\n",
+ " interval_width=0.8,\n",
+ " uncertainty_samples=1000\n",
+ " )\n",
+ "\n",
+ " # Add additional regressors if available\n",
+ " if include_regressors and \"data_dl\" in data.columns and \"rrc_succ\" in data.columns:\n",
+ " model.add_regressor(\"data_dl\", standardize=False)\n",
+ " model.add_regressor(\"rrc_succ\", standardize=False)\n",
+ "\n",
+ " return model\n",
+ "\n",
+ "def train_prophet_models(prophet_data: Dict[str, Dict], model_dir: str = model_dir) -> Dict[str, Prophet]:\n",
+ " \"\"\"\n",
+ " Train Prophet models for each slice_type and nssi_id combination\n",
+ " \"\"\"\n",
+ " os.makedirs(model_dir, exist_ok=True)\n",
+ " models = {}\n",
+ " training_metrics = {}\n",
+ "\n",
+ " for key, data_dict in prophet_data.items():\n",
+ " print(f\"Training Prophet model for {key}...\")\n",
+ "\n",
+ " df = data_dict[\"data\"]\n",
+ "\n",
+ " # Split data into train and validation (80-20 split)\n",
+ " split_idx = int(len(df) * 0.8)\n",
+ " train_df = df.iloc[:split_idx]\n",
+ " val_df = df.iloc[split_idx:]\n",
+ "\n",
+ " # Build and train model\n",
+ " model = build_prophet_model(train_df)\n",
+ "\n",
+ " # Fit the model\n",
+ " model.fit(train_df)\n",
+ "\n",
+ " # Make predictions on validation set\n",
+ " if len(val_df) > 0:\n",
+ " future_df = model.make_future_dataframe(periods=len(val_df), freq=freq, include_history=False)\n",
+ "\n",
+ " # Add regressor values to future dataframe\n",
+ " if \"data_dl\" in train_df.columns:\n",
+ " future_df[\"data_dl\"] = val_df[\"data_dl\"].values\n",
+ " if \"rrc_succ\" in train_df.columns:\n",
+ " future_df[\"rrc_succ\"] = val_df[\"rrc_succ\"].values\n",
+ "\n",
+ " forecast = model.predict(future_df)\n",
+ "\n",
+ " # Calculate metrics\n",
+ " y_true = val_df[\"y\"].values\n",
+ " y_pred = forecast[\"yhat\"].values[:len(y_true)]\n",
+ "\n",
+ " mae = mean_absolute_error(y_true, y_pred)\n",
+ " rmse = root_mean_squared_error(y_true, y_pred)\n",
+ " r2 = r2_score(y_true, y_pred)\n",
+ "\n",
+ " training_metrics[key] = {\n",
+ " \"mae\": mae,\n",
+ " \"rmse\": rmse,\n",
+ " \"r2\": r2,\n",
+ " \"train_samples\": len(train_df),\n",
+ " \"val_samples\": len(val_df)\n",
+ " }\n",
+ "\n",
+ " print(f\" MAE: {mae:.4f}, RMSE: {rmse:.4f}, R2: {r2:.4f}\")\n",
+ "\n",
+ " # Save model\n",
+ " model_path = os.path.join(model_dir, f\"prophet_model_{key}.pkl\")\n",
+ " with open(model_path, 'wb') as f:\n",
+ " pickle.dump(model, f)\n",
+ "\n",
+ " models[key] = model\n",
+ "\n",
+ " # Save training metrics\n",
+ " metrics_path = os.path.join(model_dir, \"training_metrics.json\")\n",
+ " with open(metrics_path, 'w') as f:\n",
+ " json.dump(training_metrics, f, indent=2)\n",
+ "\n",
+ " return models\n",
+ "\n",
+ "print(\"Prophet model building functions defined!\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 6. Prediction and Evaluation Functions - FIXED FOR NEXT SLOT PREDICTION"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def make_forecast(models: Dict[str, Prophet], prophet_data: Dict[str, Dict], \n",
+ " periods: int = forecast_periods, model_dir: str = model_dir) -> Dict[str, pd.DataFrame]:\n",
+ " \"\"\"\n",
+ " Make forecasts using trained Prophet models for the next time slots\n",
+ " \n",
+ " KEY FIX: This function now correctly predicts for the next time slots\n",
+ " starting from the last data point + 15 minutes\n",
+ " \"\"\"\n",
+ " forecasts = {}\n",
+ "\n",
+ " for key, model in models.items():\n",
+ " print(f\"Making forecast for {key}...\")\n",
+ "\n",
+ " # Get the latest data for this key\n",
+ " data_dict = prophet_data[key]\n",
+ " historical_data = data_dict[\"data\"]\n",
+ "\n",
+ " # Get the last timestamp from historical data\n",
+ " last_timestamp = historical_data[\"ds\"].max()\n",
+ " \n",
+ " # Create future dataframe starting from the last timestamp + 15min\n",
+ " future = model.make_future_dataframe(periods=periods, freq=freq, include_history=False)\n",
+ " \n",
+ " # Adjust the future dataframe to start from the next time slot\n",
+ " future_times = pd.date_range(\n",
+ " start=last_timestamp + pd.Timedelta(minutes=15),\n",
+ " periods=periods,\n",
+ " freq=freq\n",
+ " )\n",
+ " future[\"ds\"] = future_times\n",
+ "\n",
+ " # Add regressor values (for simplicity, we'll use the last known values)\n",
+ " if \"data_dl\" in historical_data.columns:\n",
+ " last_data_dl = historical_data[\"data_dl\"].iloc[-1]\n",
+ " future[\"data_dl\"] = last_data_dl\n",
+ "\n",
+ " if \"rrc_succ\" in historical_data.columns:\n",
+ " last_rrc_succ = historical_data[\"rrc_succ\"].iloc[-1]\n",
+ " future[\"rrc_succ\"] = last_rrc_succ\n",
+ "\n",
+ " # Make forecast\n",
+ " forecast = model.predict(future)\n",
+ "\n",
+ " # Save forecast\n",
+ " forecast_path = os.path.join(model_dir, f\"forecast_{key}.csv\")\n",
+ " forecast.to_csv(forecast_path, index=False)\n",
+ "\n",
+ " forecasts[key] = forecast\n",
+ "\n",
+ " # Print the forecast times for verification\n",
+ " print(f\" Forecast times for {key}:\")\n",
+ " for idx, row in forecast.iterrows():\n",
+ " print(f\" {row['ds']}: {row['yhat']:.2f}\")\n",
+ "\n",
+ " return forecasts\n",
+ "\n",
+ "print(\"Prediction and evaluation functions defined!\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 7. Visualization Functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def plot_forecasts(forecasts: Dict[str, pd.DataFrame], prophet_data: Dict[str, Dict], \n",
+ " save_dir: str = model_dir):\n",
+ " \"\"\"\n",
+ " Plot forecasts for each model\n",
+ " \"\"\"\n",
+ " os.makedirs(save_dir, exist_ok=True)\n",
+ "\n",
+ " for key, forecast in forecasts.items():\n",
+ " plt.figure(figsize=(15, 10))\n",
+ "\n",
+ " # Plot actual vs predicted\n",
+ " data_dict = prophet_data[key]\n",
+ " historical_data = data_dict[\"data\"]\n",
+ "\n",
+ " # Plot historical data\n",
+ " plt.plot(historical_data[\"ds\"], historical_data[\"y\"], \n",
+ " label=\"Historical PRB Usage\", color=\"blue\", alpha=0.7)\n",
+ "\n",
+ " # Plot forecast\n",
+ " plt.plot(forecast[\"ds\"], forecast[\"yhat\"], \n",
+ " label=\"Forecasted PRB Usage\", color=\"red\", alpha=0.7)\n",
+ "\n",
+ " # Plot uncertainty intervals\n",
+ " plt.fill_between(forecast[\"ds\"], \n",
+ " forecast[\"yhat_lower\"], \n",
+ " forecast[\"yhat_upper\"], \n",
+ " color=\"red\", alpha=0.2, label=\"Uncertainty Interval\")\n",
+ "\n",
+ " plt.title(f\"Prophet Forecast for {key}\")\n",
+ " plt.xlabel(\"Time\")\n",
+ " plt.ylabel(\"PRB Usage\")\n",
+ " plt.legend()\n",
+ " plt.grid(True, alpha=0.3)\n",
+ " plt.xticks(rotation=45)\n",
+ " plt.tight_layout()\n",
+ "\n",
+ " # Save plot\n",
+ " plot_path = os.path.join(save_dir, f\"prophet_forecast_{key}.png\")\n",
+ " plt.savefig(plot_path, dpi=300, bbox_inches='tight')\n",
+ " plt.close()\n",
+ "\n",
+ " print(f\"Forecast plot saved for {key}\")\n",
+ "\n",
+ "print(\"Visualization functions defined!\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 8. Main Execution Pipeline\n",
+ "\n",
+ "### Step 1: Fetch Data from InfluxDB"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"=== Step 1: Fetching Data ===\")\n",
+ "\n",
+ "# Check if InfluxDB configuration is set\n",
+ "if not influx_url or not influx_token or not influx_org:\n",
+ " print(\"⚠️ Warning: InfluxDB configuration is not set!\")\n",
+ " print(\"Please set the following variables in the Configuration cell:\")\n",
+ " print(\"- influx_url\")\n",
+ " print(\"- influx_token\")\n",
+ " print(\"- influx_org\")\n",
+ " print(\"\\nFor demonstration purposes, you can load sample data instead.\")\n",
+ "else:\n",
+ " try:\n",
+ " df = fetch_from_influx()\n",
+ " print(f\"✅ Data shape: {df.shape}\")\n",
+ " print(f\"✅ Date range: {df['time'].min()} to {df['time'].max()}\")\n",
+ " print(f\"✅ Unique slice types: {df['slice_type'].nunique()}\")\n",
+ " print(f\"✅ Unique NSSI IDs: {df['nssi_id'].nunique()}\")\n",
+ "\n",
+ " # Display sample data\n",
+ " print(\"\\nSample data:\")\n",
+ " display(df.head())\n",
+ "\n",
+ " # Display data statistics\n",
+ " print(\"\\nData statistics:\")\n",
+ " display(df.describe())\n",
+ "\n",
+ " except Exception as e:\n",
+ " print(f\"❌ Error fetching data: {str(e)}\")\n",
+ " print(\"Please check your InfluxDB configuration and connection.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 2: Prepare Data for Prophet"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"=== Step 2: Preparing Prophet Data ===\")\n",
+ "\n",
+ "# Check if df exists from previous step\n",
+ "if 'df' in locals():\n",
+ " prophet_data = prepare_prophet_data(df)\n",
+ " print(f\"✅ Created {len(prophet_data)} Prophet datasets\")\n",
+ "\n",
+ " # Display information about prepared datasets\n",
+ " print(\"\\nProphet datasets summary:\")\n",
+ " for key, data_dict in prophet_data.items():\n",
+ " data_shape = data_dict['data'].shape\n",
+ " print(f\" {key}: {data_shape[0]} samples\")\n",
+ "\n",
+ " # Display sample of one dataset\n",
+ " sample_key = list(prophet_data.keys())[0]\n",
+ " print(f\"\\nSample data for {sample_key}:\")\n",
+ " display(prophet_data[sample_key]['data'].head())\n",
+ "else:\n",
+ " print(\"❌ No data available. Please complete Step 1 first.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 3: Train Prophet Models"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"=== Step 3: Training Prophet Models ===\")\n",
+ "\n",
+ "# Check if prophet_data exists from previous step\n",
+ "if 'prophet_data' in locals():\n",
+ " models = train_prophet_models(prophet_data)\n",
+ " print(f\"✅ Trained {len(models)} Prophet models\")\n",
+ "\n",
+ " # Load and display training metrics\n",
+ " metrics_path = os.path.join(model_dir, \"training_metrics.json\")\n",
+ " if os.path.exists(metrics_path):\n",
+ " with open(metrics_path, 'r') as f:\n",
+ " training_metrics = json.load(f)\n",
+ "\n",
+ " print(\"\\nTraining Metrics:\")\n",
+ " metrics_df = pd.DataFrame(training_metrics).T\n",
+ " display(metrics_df)\n",
+ "\n",
+ " # Plot training metrics\n",
+ " fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n",
+ " metrics_df['mae'].plot(kind='bar', ax=axes[0,0], title='Mean Absolute Error')\n",
+ " metrics_df['rmse'].plot(kind='bar', ax=axes[0,1], title='Root Mean Square Error')\n",
+ " metrics_df['r2'].plot(kind='bar', ax=axes[1,0], title='R² Score')\n",
+ " metrics_df[['train_samples', 'val_samples']].plot(kind='bar', ax=axes[1,1], title='Sample Sizes')\n",
+ "\n",
+ " plt.tight_layout()\n",
+ " plt.show()\n",
+ "else:\n",
+ " print(\"❌ No Prophet data available. Please complete Step 2 first.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 4: Make Forecasts - NOW PREDICTS NEXT SLOTS CORRECTLY"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"=== Step 4: Making Forecasts ===\")\n",
+ "\n",
+ "# Check if models exist from previous step\n",
+ "if 'models' in locals():\n",
+ " forecasts = make_forecast(models, prophet_data)\n",
+ " print(f\"✅ Generated forecasts for {len(forecasts)} models\")\n",
+ "\n",
+ " # Display forecast summary\n",
+ " print(\"\\nForecast Summary:\")\n",
+ " for key, forecast in forecasts.items():\n",
+ " forecast_shape = forecast.shape\n",
+ " last_date = forecast['ds'].max()\n",
+ " print(f\" {key}: {forecast_shape[0]} total points, forecast until {last_date}\")\n",
+ "\n",
+ " # Display sample forecast for one model\n",
+ " sample_key = list(forecasts.keys())[0]\n",
+ " print(f\"\\nSample forecast for {sample_key}:\")\n",
+ " sample_forecast = forecasts[sample_key].tail(10) # Show last 10 forecast points\n",
+ " display(sample_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])\n",
+ "else:\n",
+ " print(\"❌ No trained models available. Please complete Step 3 first.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 5: Create Visualizations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"=== Step 5: Creating Visualizations ===\")\n",
+ "\n",
+ "# Check if forecasts exist from previous step\n",
+ "if 'forecasts' in locals():\n",
+ " plot_forecasts(forecasts, prophet_data)\n",
+ " print(\"✅ All forecast plots created and saved!\")\n",
+ "\n",
+ " # Display one of the generated plots inline\n",
+ " sample_key = list(forecasts.keys())[0]\n",
+ " plot_path = os.path.join(model_dir, f\"prophet_forecast_{sample_key}.png\")\n",
+ "\n",
+ " if os.path.exists(plot_path):\n",
+ " print(f\"\\nDisplaying forecast plot for {sample_key}:\")\n",
+ " from IPython.display import Image\n",
+ " display(Image(filename=plot_path))\n",
+ "else:\n",
+ " print(\"❌ No forecasts available. Please complete Step 4 first.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 6: Save Metadata"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"=== Step 6: Saving Metadata ===\")\n",
+ "\n",
+ "# Check if we have the required data\n",
+ "if 'df' in locals() and 'models' in locals():\n",
+ " metadata = {\n",
+ " \"model_type\": \"Prophet\",\n",
+ " \"forecast_periods\": forecast_periods,\n",
+ " \"frequency\": freq,\n",
+ " \"total_models\": len(models),\n",
+ " \"data_range\": {\n",
+ " \"start\": df[\"time\"].min().isoformat(),\n",
+ " \"end\": df[\"time\"].max().isoformat()\n",
+ " },\n",
+ " \"slice_types\": df[\"slice_type\"].unique().tolist(),\n",
+ " \"nssi_ids\": df[\"nssi_id\"].unique().tolist(),\n",
+ " \"features\": [\"prb_dl\", \"data_dl\", \"rrc_succ\"],\n",
+ " \"target\": \"prb_dl\",\n",
+ " \"created_at\": datetime.now().isoformat(),\n",
+ " \"model_directory\": model_dir,\n",
+ " \"fix_applied\": \"Updated to predict next 15-minute slots instead of hourly predictions\"\n",
+ " }\n",
+ "\n",
+ " with open(os.path.join(model_dir, \"metadata.json\"), \"w\") as f:\n",
+ " json.dump(metadata, f, indent=2)\n",
+ "\n",
+ " print(\"✅ Metadata saved successfully!\")\n",
+ " print(\"\\nPipeline Metadata:\")\n",
+ " for key, value in metadata.items():\n",
+ " print(f\" {key}: {value}\")\n",
+ "\n",
+ " print(f\"\\n📁 All artifacts saved to '{model_dir}' directory:\")\n",
+ " if os.path.exists(model_dir):\n",
+ " files = os.listdir(model_dir)\n",
+ " for file in sorted(files):\n",
+ " print(f\" - {file}\")\n",
+ "else:\n",
+ " print(\"❌ Required data not available. Please complete previous steps first.\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}