From 5f0b7dbfd640cfaa6c94bfad179d6458cc3254fe Mon Sep 17 00:00:00 2001 From: "karthikeyan.s" Date: Fri, 5 Dec 2025 17:36:13 +0530 Subject: [PATCH] Jupyter notebook for RAN Slice PRB prediction using Prophet model Change-Id: Icdb07b9436ebff869feef8ce56fb299620f6c275 Signed-off-by: karthikeyan.s --- .../RAN_Slice_Prophet_PRB_Prediction.ipynb | 736 +++++++++++++++++++++ 1 file changed, 736 insertions(+) create mode 100644 sample-rapp-generator/rapp-slice-prb-prediction/RAN_Slice_Prophet_PRB_Prediction.ipynb diff --git a/sample-rapp-generator/rapp-slice-prb-prediction/RAN_Slice_Prophet_PRB_Prediction.ipynb b/sample-rapp-generator/rapp-slice-prb-prediction/RAN_Slice_Prophet_PRB_Prediction.ipynb new file mode 100644 index 0000000..ddb8c3d --- /dev/null +++ b/sample-rapp-generator/rapp-slice-prb-prediction/RAN_Slice_Prophet_PRB_Prediction.ipynb @@ -0,0 +1,736 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RAN Slice PRB Prediction with Prophet\n", + "\n", + "This notebook deploys Facebook's Prophet time series forecasting library for PRB Prediction\n", + "\n", + "## Overview\n", + "\n", + "- Fetches NSSAI performance data from InfluxDB\n", + "- Preprocesses data for Prophet format\n", + "- Creates separate Prophet models for each slice type and NSSI combination\n", + "- Trains and evaluates Prophet models\n", + "- Saves model artifacts for deployment\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Imports and Configuration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Standard library imports\n", + "import os\n", + "import sys\n", + "import argparse\n", + "import json\n", + "import pickle\n", + "from datetime import datetime, timezone\n", + "from typing import Tuple, List, Dict, Optional\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# Data manipulation and analysis\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "# InfluxDB client\n", + "from influxdb_client import InfluxDBClient\n", + "from influxdb_client.client.flux_table import FluxStructureEncoder\n", + "from influxdb_client.client.write_api import SYNCHRONOUS\n", + "\n", + "# Machine learning utilities\n", + "from sklearn.preprocessing import MinMaxScaler, OneHotEncoder\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error\n", + "\n", + "# Visualization\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "# Prophet imports\n", + "from prophet import Prophet\n", + "from prophet.diagnostics import cross_validation, performance_metrics\n", + "from prophet.plot import plot_cross_validation_metric, plot_components_plotly, plot_plotly\n", + "\n", + "print(\"All imports completed successfully!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Configuration Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# InfluxDB Configuration\n", + "influx_url = \"http://localhost:8086\"\n", + "influx_org = \"srib\"\n", + "influx_token = \"WNwnbxZog226V4gIE6Mw37UevU7jv8O4jebwCFfSVQnsT5ER_q4RpYgf4nMM6dN2c4K6pgdbrgYOoRIOt82x7A==\"\n", + "bucket = \"nssi_pm_bucket\"\n", + "measurement = \"nssi_pm_bucket\"\n", + "start = \"-0\" # Time range (e.g., \"-30d\" for last 30 days)\n", + "\n", + "# Field and Tag Definitions\n", + "field_prb_dl = \"RRU.PrbDl.SNSSAI\"\n", + "field_data_dl = \"DRB.PdcpSduVolumeDL.SNSSAI\"\n", + "field_rrc_succ = \"RRC.ConnEstabSucc.Cause\"\n", + "tag_slice_type = \"sliceType\"\n", + "tag_nssi_id = \"measObjLdn\"\n", + "\n", + "# Model Parameters - FIXED FOR NEXT SLOT PREDICTION\n", + "forecast_periods = 10 # Number of periods to forecast ahead (next 2 slots)\n", + "freq = '15min' # 15-minute frequency to match the data generation interval\n", + "\n", + "# Model directory\n", + "model_dir = \"prophet_models\"\n", + "\n", + "print(\"Configuration parameters set!\")\n", + "print(f\"Forecast periods: {forecast_periods}\")\n", + "print(f\"Frequency: {freq}\")\n", + "print(f\"Model directory: {model_dir}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Data Fetching Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def build_flux_query() -> str:\n", + " \"\"\"\n", + " Build a Flux query that:\n", + " - filters measurement\n", + " - keeps relevant fields\n", + " - pivots to a wide table: columns for prb_dl, data_dl, rrc_succ\n", + " - keeps slice_type and nssi_id as columns\n", + " \"\"\"\n", + "\n", + " field_names=[field_prb_dl, field_data_dl, field_rrc_succ]\n", + " fields_filter = \" or \".join([f'r[\"_field\"] == \"{f}\"' for f in field_names])\n", + " q = f'''\n", + "from(bucket: \"{bucket}\")\n", + " |> range(start: {start})\n", + " |> filter(fn: (r) => r[\"_measurement\"] == \"{measurement}\")\n", + " |> filter(fn: (r) => {fields_filter})\n", + " |> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\n", + " |> keep(columns: [\"_time\", \"{tag_slice_type}\", \"{tag_nssi_id}\", \"{'\",\"'.join(field_names)}\"])\n", + " |> sort(columns: [\"_time\"])\n", + "'''\n", + "\n", + " return q\n", + "\n", + "def fetch_from_influx() -> pd.DataFrame:\n", + " \"\"\"Fetch data from InfluxDB and return as pandas DataFrame\"\"\"\n", + " client = InfluxDBClient(url=influx_url, token=influx_token, org=influx_org, timeout=60_000)\n", + " query_api = client.query_api()\n", + " flux = build_flux_query()\n", + " tables = query_api.query_data_frame(query=flux)\n", + " client.close()\n", + "\n", + " if isinstance(tables, list) and len(tables) > 0:\n", + " df = pd.concat(tables, ignore_index=True)\n", + " else:\n", + " df = tables\n", + "\n", + " if df is None or df.empty:\n", + " raise RuntimeError(\"No data returned from InfluxDB. Check your query parameters.\")\n", + "\n", + " # Standardize column names\n", + " df = df.rename(columns={\n", + " \"_time\": \"time\",\n", + " tag_slice_type: \"slice_type\",\n", + " tag_nssi_id: \"nssi_id\",\n", + " field_prb_dl: \"prb_dl\",\n", + " field_data_dl: \"data_dl\",\n", + " field_rrc_succ: \"rrc_succ\"\n", + " })\n", + "\n", + " # Ensure types\n", + " df[\"time\"] = pd.to_datetime(df[\"time\"], utc=True)\n", + " df = df.sort_values([\"slice_type\", \"nssi_id\", \"time\"]).reset_index(drop=True)\n", + "\n", + " # Drop rows with any NA in core columns\n", + " df = df.dropna(subset=[\"slice_type\", \"nssi_id\", \"time\", \"prb_dl\", \"data_dl\", \"rrc_succ\"])\n", + "\n", + " return df[[\"time\", \"slice_type\", \"nssi_id\", \"prb_dl\", \"data_dl\", \"rrc_succ\"]]\n", + "\n", + "print(\"Data fetching functions defined!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Data Preparation Functions for Prophet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def prepare_prophet_data(df: pd.DataFrame) -> Dict[str, Dict]:\n", + " \"\"\"\n", + " Prepare data for Prophet by creating separate DataFrames for each slice_type and nssi_id combination\n", + " \"\"\"\n", + " prophet_data = {}\n", + "\n", + " for (slice_type, nssi_id), group in df.groupby([\"slice_type\", \"nssi_id\"]):\n", + " # Sort by time\n", + " group = group.sort_values(\"time\").reset_index(drop=True)\n", + "\n", + " # Prepare Prophet format: ds (datetime) and y (target)\n", + " prophet_df = group[[\"time\", \"prb_dl\"]].copy()\n", + " prophet_df.columns = [\"ds\", \"y\"]\n", + "\n", + " # FIX: Remove timezone information from ds column for Prophet compatibility\n", + " # Prophet doesn't support timezone-aware datetimes in the 'ds' column\n", + " prophet_df[\"ds\"] = prophet_df[\"ds\"].dt.tz_localize(None)\n", + " \n", + " # Add additional regressors\n", + " prophet_df[\"data_dl\"] = group[\"data_dl\"].values\n", + " prophet_df[\"rrc_succ\"] = group[\"rrc_succ\"].values\n", + "\n", + " # Store with unique key\n", + " key = f\"{slice_type}_{nssi_id}\"\n", + " prophet_data[key] = {\n", + " \"data\": prophet_df,\n", + " \"slice_type\": slice_type,\n", + " \"nssi_id\": nssi_id\n", + " }\n", + "\n", + " return prophet_data\n", + "\n", + "print(\"Data preparation functions defined!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Prophet Model Building Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def build_prophet_model(data: pd.DataFrame, include_regressors: bool = True) -> Prophet:\n", + " \"\"\"\n", + " Build a Prophet model with appropriate configurations\n", + " \"\"\"\n", + " model = Prophet(\n", + " yearly_seasonality=True,\n", + " weekly_seasonality=True,\n", + " daily_seasonality=True,\n", + " changepoint_prior_scale=0.05,\n", + " seasonality_prior_scale=10.0,\n", + " holidays_prior_scale=10.0,\n", + " mcmc_samples=0,\n", + " interval_width=0.8,\n", + " uncertainty_samples=1000\n", + " )\n", + "\n", + " # Add additional regressors if available\n", + " if include_regressors and \"data_dl\" in data.columns and \"rrc_succ\" in data.columns:\n", + " model.add_regressor(\"data_dl\", standardize=False)\n", + " model.add_regressor(\"rrc_succ\", standardize=False)\n", + "\n", + " return model\n", + "\n", + "def train_prophet_models(prophet_data: Dict[str, Dict], model_dir: str = model_dir) -> Dict[str, Prophet]:\n", + " \"\"\"\n", + " Train Prophet models for each slice_type and nssi_id combination\n", + " \"\"\"\n", + " os.makedirs(model_dir, exist_ok=True)\n", + " models = {}\n", + " training_metrics = {}\n", + "\n", + " for key, data_dict in prophet_data.items():\n", + " print(f\"Training Prophet model for {key}...\")\n", + "\n", + " df = data_dict[\"data\"]\n", + "\n", + " # Split data into train and validation (80-20 split)\n", + " split_idx = int(len(df) * 0.8)\n", + " train_df = df.iloc[:split_idx]\n", + " val_df = df.iloc[split_idx:]\n", + "\n", + " # Build and train model\n", + " model = build_prophet_model(train_df)\n", + "\n", + " # Fit the model\n", + " model.fit(train_df)\n", + "\n", + " # Make predictions on validation set\n", + " if len(val_df) > 0:\n", + " future_df = model.make_future_dataframe(periods=len(val_df), freq=freq, include_history=False)\n", + "\n", + " # Add regressor values to future dataframe\n", + " if \"data_dl\" in train_df.columns:\n", + " future_df[\"data_dl\"] = val_df[\"data_dl\"].values\n", + " if \"rrc_succ\" in train_df.columns:\n", + " future_df[\"rrc_succ\"] = val_df[\"rrc_succ\"].values\n", + "\n", + " forecast = model.predict(future_df)\n", + "\n", + " # Calculate metrics\n", + " y_true = val_df[\"y\"].values\n", + " y_pred = forecast[\"yhat\"].values[:len(y_true)]\n", + "\n", + " mae = mean_absolute_error(y_true, y_pred)\n", + " rmse = root_mean_squared_error(y_true, y_pred)\n", + " r2 = r2_score(y_true, y_pred)\n", + "\n", + " training_metrics[key] = {\n", + " \"mae\": mae,\n", + " \"rmse\": rmse,\n", + " \"r2\": r2,\n", + " \"train_samples\": len(train_df),\n", + " \"val_samples\": len(val_df)\n", + " }\n", + "\n", + " print(f\" MAE: {mae:.4f}, RMSE: {rmse:.4f}, R2: {r2:.4f}\")\n", + "\n", + " # Save model\n", + " model_path = os.path.join(model_dir, f\"prophet_model_{key}.pkl\")\n", + " with open(model_path, 'wb') as f:\n", + " pickle.dump(model, f)\n", + "\n", + " models[key] = model\n", + "\n", + " # Save training metrics\n", + " metrics_path = os.path.join(model_dir, \"training_metrics.json\")\n", + " with open(metrics_path, 'w') as f:\n", + " json.dump(training_metrics, f, indent=2)\n", + "\n", + " return models\n", + "\n", + "print(\"Prophet model building functions defined!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Prediction and Evaluation Functions - FIXED FOR NEXT SLOT PREDICTION" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def make_forecast(models: Dict[str, Prophet], prophet_data: Dict[str, Dict], \n", + " periods: int = forecast_periods, model_dir: str = model_dir) -> Dict[str, pd.DataFrame]:\n", + " \"\"\"\n", + " Make forecasts using trained Prophet models for the next time slots\n", + " \n", + " KEY FIX: This function now correctly predicts for the next time slots\n", + " starting from the last data point + 15 minutes\n", + " \"\"\"\n", + " forecasts = {}\n", + "\n", + " for key, model in models.items():\n", + " print(f\"Making forecast for {key}...\")\n", + "\n", + " # Get the latest data for this key\n", + " data_dict = prophet_data[key]\n", + " historical_data = data_dict[\"data\"]\n", + "\n", + " # Get the last timestamp from historical data\n", + " last_timestamp = historical_data[\"ds\"].max()\n", + " \n", + " # Create future dataframe starting from the last timestamp + 15min\n", + " future = model.make_future_dataframe(periods=periods, freq=freq, include_history=False)\n", + " \n", + " # Adjust the future dataframe to start from the next time slot\n", + " future_times = pd.date_range(\n", + " start=last_timestamp + pd.Timedelta(minutes=15),\n", + " periods=periods,\n", + " freq=freq\n", + " )\n", + " future[\"ds\"] = future_times\n", + "\n", + " # Add regressor values (for simplicity, we'll use the last known values)\n", + " if \"data_dl\" in historical_data.columns:\n", + " last_data_dl = historical_data[\"data_dl\"].iloc[-1]\n", + " future[\"data_dl\"] = last_data_dl\n", + "\n", + " if \"rrc_succ\" in historical_data.columns:\n", + " last_rrc_succ = historical_data[\"rrc_succ\"].iloc[-1]\n", + " future[\"rrc_succ\"] = last_rrc_succ\n", + "\n", + " # Make forecast\n", + " forecast = model.predict(future)\n", + "\n", + " # Save forecast\n", + " forecast_path = os.path.join(model_dir, f\"forecast_{key}.csv\")\n", + " forecast.to_csv(forecast_path, index=False)\n", + "\n", + " forecasts[key] = forecast\n", + "\n", + " # Print the forecast times for verification\n", + " print(f\" Forecast times for {key}:\")\n", + " for idx, row in forecast.iterrows():\n", + " print(f\" {row['ds']}: {row['yhat']:.2f}\")\n", + "\n", + " return forecasts\n", + "\n", + "print(\"Prediction and evaluation functions defined!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Visualization Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_forecasts(forecasts: Dict[str, pd.DataFrame], prophet_data: Dict[str, Dict], \n", + " save_dir: str = model_dir):\n", + " \"\"\"\n", + " Plot forecasts for each model\n", + " \"\"\"\n", + " os.makedirs(save_dir, exist_ok=True)\n", + "\n", + " for key, forecast in forecasts.items():\n", + " plt.figure(figsize=(15, 10))\n", + "\n", + " # Plot actual vs predicted\n", + " data_dict = prophet_data[key]\n", + " historical_data = data_dict[\"data\"]\n", + "\n", + " # Plot historical data\n", + " plt.plot(historical_data[\"ds\"], historical_data[\"y\"], \n", + " label=\"Historical PRB Usage\", color=\"blue\", alpha=0.7)\n", + "\n", + " # Plot forecast\n", + " plt.plot(forecast[\"ds\"], forecast[\"yhat\"], \n", + " label=\"Forecasted PRB Usage\", color=\"red\", alpha=0.7)\n", + "\n", + " # Plot uncertainty intervals\n", + " plt.fill_between(forecast[\"ds\"], \n", + " forecast[\"yhat_lower\"], \n", + " forecast[\"yhat_upper\"], \n", + " color=\"red\", alpha=0.2, label=\"Uncertainty Interval\")\n", + "\n", + " plt.title(f\"Prophet Forecast for {key}\")\n", + " plt.xlabel(\"Time\")\n", + " plt.ylabel(\"PRB Usage\")\n", + " plt.legend()\n", + " plt.grid(True, alpha=0.3)\n", + " plt.xticks(rotation=45)\n", + " plt.tight_layout()\n", + "\n", + " # Save plot\n", + " plot_path = os.path.join(save_dir, f\"prophet_forecast_{key}.png\")\n", + " plt.savefig(plot_path, dpi=300, bbox_inches='tight')\n", + " plt.close()\n", + "\n", + " print(f\"Forecast plot saved for {key}\")\n", + "\n", + "print(\"Visualization functions defined!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8. Main Execution Pipeline\n", + "\n", + "### Step 1: Fetch Data from InfluxDB" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=== Step 1: Fetching Data ===\")\n", + "\n", + "# Check if InfluxDB configuration is set\n", + "if not influx_url or not influx_token or not influx_org:\n", + " print(\"⚠️ Warning: InfluxDB configuration is not set!\")\n", + " print(\"Please set the following variables in the Configuration cell:\")\n", + " print(\"- influx_url\")\n", + " print(\"- influx_token\")\n", + " print(\"- influx_org\")\n", + " print(\"\\nFor demonstration purposes, you can load sample data instead.\")\n", + "else:\n", + " try:\n", + " df = fetch_from_influx()\n", + " print(f\"✅ Data shape: {df.shape}\")\n", + " print(f\"✅ Date range: {df['time'].min()} to {df['time'].max()}\")\n", + " print(f\"✅ Unique slice types: {df['slice_type'].nunique()}\")\n", + " print(f\"✅ Unique NSSI IDs: {df['nssi_id'].nunique()}\")\n", + "\n", + " # Display sample data\n", + " print(\"\\nSample data:\")\n", + " display(df.head())\n", + "\n", + " # Display data statistics\n", + " print(\"\\nData statistics:\")\n", + " display(df.describe())\n", + "\n", + " except Exception as e:\n", + " print(f\"❌ Error fetching data: {str(e)}\")\n", + " print(\"Please check your InfluxDB configuration and connection.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2: Prepare Data for Prophet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=== Step 2: Preparing Prophet Data ===\")\n", + "\n", + "# Check if df exists from previous step\n", + "if 'df' in locals():\n", + " prophet_data = prepare_prophet_data(df)\n", + " print(f\"✅ Created {len(prophet_data)} Prophet datasets\")\n", + "\n", + " # Display information about prepared datasets\n", + " print(\"\\nProphet datasets summary:\")\n", + " for key, data_dict in prophet_data.items():\n", + " data_shape = data_dict['data'].shape\n", + " print(f\" {key}: {data_shape[0]} samples\")\n", + "\n", + " # Display sample of one dataset\n", + " sample_key = list(prophet_data.keys())[0]\n", + " print(f\"\\nSample data for {sample_key}:\")\n", + " display(prophet_data[sample_key]['data'].head())\n", + "else:\n", + " print(\"❌ No data available. Please complete Step 1 first.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 3: Train Prophet Models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=== Step 3: Training Prophet Models ===\")\n", + "\n", + "# Check if prophet_data exists from previous step\n", + "if 'prophet_data' in locals():\n", + " models = train_prophet_models(prophet_data)\n", + " print(f\"✅ Trained {len(models)} Prophet models\")\n", + "\n", + " # Load and display training metrics\n", + " metrics_path = os.path.join(model_dir, \"training_metrics.json\")\n", + " if os.path.exists(metrics_path):\n", + " with open(metrics_path, 'r') as f:\n", + " training_metrics = json.load(f)\n", + "\n", + " print(\"\\nTraining Metrics:\")\n", + " metrics_df = pd.DataFrame(training_metrics).T\n", + " display(metrics_df)\n", + "\n", + " # Plot training metrics\n", + " fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n", + " metrics_df['mae'].plot(kind='bar', ax=axes[0,0], title='Mean Absolute Error')\n", + " metrics_df['rmse'].plot(kind='bar', ax=axes[0,1], title='Root Mean Square Error')\n", + " metrics_df['r2'].plot(kind='bar', ax=axes[1,0], title='R² Score')\n", + " metrics_df[['train_samples', 'val_samples']].plot(kind='bar', ax=axes[1,1], title='Sample Sizes')\n", + "\n", + " plt.tight_layout()\n", + " plt.show()\n", + "else:\n", + " print(\"❌ No Prophet data available. Please complete Step 2 first.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 4: Make Forecasts - NOW PREDICTS NEXT SLOTS CORRECTLY" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=== Step 4: Making Forecasts ===\")\n", + "\n", + "# Check if models exist from previous step\n", + "if 'models' in locals():\n", + " forecasts = make_forecast(models, prophet_data)\n", + " print(f\"✅ Generated forecasts for {len(forecasts)} models\")\n", + "\n", + " # Display forecast summary\n", + " print(\"\\nForecast Summary:\")\n", + " for key, forecast in forecasts.items():\n", + " forecast_shape = forecast.shape\n", + " last_date = forecast['ds'].max()\n", + " print(f\" {key}: {forecast_shape[0]} total points, forecast until {last_date}\")\n", + "\n", + " # Display sample forecast for one model\n", + " sample_key = list(forecasts.keys())[0]\n", + " print(f\"\\nSample forecast for {sample_key}:\")\n", + " sample_forecast = forecasts[sample_key].tail(10) # Show last 10 forecast points\n", + " display(sample_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])\n", + "else:\n", + " print(\"❌ No trained models available. Please complete Step 3 first.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 5: Create Visualizations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=== Step 5: Creating Visualizations ===\")\n", + "\n", + "# Check if forecasts exist from previous step\n", + "if 'forecasts' in locals():\n", + " plot_forecasts(forecasts, prophet_data)\n", + " print(\"✅ All forecast plots created and saved!\")\n", + "\n", + " # Display one of the generated plots inline\n", + " sample_key = list(forecasts.keys())[0]\n", + " plot_path = os.path.join(model_dir, f\"prophet_forecast_{sample_key}.png\")\n", + "\n", + " if os.path.exists(plot_path):\n", + " print(f\"\\nDisplaying forecast plot for {sample_key}:\")\n", + " from IPython.display import Image\n", + " display(Image(filename=plot_path))\n", + "else:\n", + " print(\"❌ No forecasts available. Please complete Step 4 first.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 6: Save Metadata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=== Step 6: Saving Metadata ===\")\n", + "\n", + "# Check if we have the required data\n", + "if 'df' in locals() and 'models' in locals():\n", + " metadata = {\n", + " \"model_type\": \"Prophet\",\n", + " \"forecast_periods\": forecast_periods,\n", + " \"frequency\": freq,\n", + " \"total_models\": len(models),\n", + " \"data_range\": {\n", + " \"start\": df[\"time\"].min().isoformat(),\n", + " \"end\": df[\"time\"].max().isoformat()\n", + " },\n", + " \"slice_types\": df[\"slice_type\"].unique().tolist(),\n", + " \"nssi_ids\": df[\"nssi_id\"].unique().tolist(),\n", + " \"features\": [\"prb_dl\", \"data_dl\", \"rrc_succ\"],\n", + " \"target\": \"prb_dl\",\n", + " \"created_at\": datetime.now().isoformat(),\n", + " \"model_directory\": model_dir,\n", + " \"fix_applied\": \"Updated to predict next 15-minute slots instead of hourly predictions\"\n", + " }\n", + "\n", + " with open(os.path.join(model_dir, \"metadata.json\"), \"w\") as f:\n", + " json.dump(metadata, f, indent=2)\n", + "\n", + " print(\"✅ Metadata saved successfully!\")\n", + " print(\"\\nPipeline Metadata:\")\n", + " for key, value in metadata.items():\n", + " print(f\" {key}: {value}\")\n", + "\n", + " print(f\"\\n📁 All artifacts saved to '{model_dir}' directory:\")\n", + " if os.path.exists(model_dir):\n", + " files = os.listdir(model_dir)\n", + " for file in sorted(files):\n", + " print(f\" - {file}\")\n", + "else:\n", + " print(\"❌ Required data not available. Please complete previous steps first.\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- 2.16.6