{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Import packages\n", "import plotly.express as px\n", "import pandas as pd\n", "\n", "# Import packages\n", "from scipy.stats import boxcox" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Read in the data\n", "data = pd.read_csv(\"../coal-price-data/AirPassengers.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Month#Passengers
01949-01112
11949-02118
21949-03132
31949-04129
41949-05121
.........
1391960-08606
1401960-09508
1411960-10461
1421960-11390
1431960-12432
\n", "

144 rows × 2 columns

\n", "
" ], "text/plain": [ " Month #Passengers\n", "0 1949-01 112\n", "1 1949-02 118\n", "2 1949-03 132\n", "3 1949-04 129\n", "4 1949-05 121\n", ".. ... ...\n", "139 1960-08 606\n", "140 1960-09 508\n", "141 1960-10 461\n", "142 1960-11 390\n", "143 1960-12 432\n", "\n", "[144 rows x 2 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "data[\"Month\"] = pd.to_datetime(data[\"Month\"])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Month#Passengers
01949-01-01112
11949-02-01118
21949-03-01132
31949-04-01129
41949-05-01121
.........
1391960-08-01606
1401960-09-01508
1411960-10-01461
1421960-11-01390
1431960-12-01432
\n", "

144 rows × 2 columns

\n", "
" ], "text/plain": [ " Month #Passengers\n", "0 1949-01-01 112\n", "1 1949-02-01 118\n", "2 1949-03-01 132\n", "3 1949-04-01 129\n", "4 1949-05-01 121\n", ".. ... ...\n", "139 1960-08-01 606\n", "140 1960-09-01 508\n", "141 1960-10-01 461\n", "142 1960-11-01 390\n", "143 1960-12-01 432\n", "\n", "[144 rows x 2 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def plot_passenger_volumes(df: pd.DataFrame, y: str) -> None:\n", " \"\"\"General function to plot the passenger data.\"\"\"\n", "\n", " fig = px.line(df, x=\"Month\", y=y, labels={\"Month\": \"Date\"})\n", " fig.update_layout(\n", " template=\"simple_white\",\n", " font=dict(size=18),\n", " title_text=\"Airline Passengers\",\n", " width=650,\n", " title_x=0.5,\n", " height=400,\n", " )\n", "\n", " return fig.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Plot the airline passenger data\n", "plot_passenger_volumes(df=data, y=\"#Passengers\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = data\n", "y = \"#Passengers\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = px.line(df, x=\"Month\", y=y, labels={\"Month\": \"Date\"})\n", "fig.update_layout(\n", " template=\"simple_white\",\n", " font=dict(size=18),\n", " title_text=\"Airline Passengers\",\n", " width=650,\n", " title_x=0.5,\n", " height=400,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(fig.show())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Make the target stationary\n", "data[\"Passengers_boxcox\"], lam = boxcox(data[\"#Passengers\"])\n", "data[\"Passenger_stationary\"] = data[\"Passengers_boxcox\"].diff()\n", "data.dropna(inplace=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Plot the stationary airline passenger data\n", "plot_passenger_volumes(df=data, y=\"Passenger_stationary\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Import packages\n", "from statsmodels.tsa.stattools import adfuller\n", "\n", "\n", "# ADF test for stationary\n", "def adf_test(series):\n", " \"\"\"Using an ADF test to determine if a series is stationary\"\"\"\n", " test_results = adfuller(series)\n", " print(\"ADF Statistic: \", test_results[0])\n", " print(\"P-Value: \", test_results[1])\n", " print(\"Critical Values:\")\n", " for threshold, adf_stat in test_results[4].items():\n", " print(\"\\t%s: %.2f\" % (threshold, adf_stat))\n", "\n", "\n", "print(adf_test(data[\"Passenger_stationary\"]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Import packages\n", "import matplotlib.pyplot as plt\n", "from statsmodels.graphics.tsaplots import plot_pacf\n", "\n", "# Plot partial autocorrelation\n", "plt.rc(\"figure\", figsize=(11, 5))\n", "plot_pacf(data[\"Passenger_stationary\"], method=\"ywm\")\n", "plt.xlabel(\"Lags\", fontsize=18)\n", "plt.ylabel(\"Correlation\", fontsize=18)\n", "plt.xticks(fontsize=18)\n", "plt.yticks(fontsize=18)\n", "plt.title(\"Partial Autocorrelation Plot\", fontsize=20)\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Import packages\n", "from statsmodels.tsa.ar_model import AutoReg, ar_select_order\n", "\n", "# Split train and test\n", "train = data.iloc[: -int(len(data) * 0.2)]\n", "test = data.iloc[-int(len(data) * 0.2) :]\n", "\n", "# Build AR model\n", "selector = ar_select_order(train[\"Passenger_stationary\"], 15)\n", "model = AutoReg(train[\"Passenger_stationary\"], lags=selector.ar_lags).fit()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Import packages\n", "from scipy.special import inv_boxcox\n", "import plotly.graph_objects as go\n", "\n", "# Get forecasts and convert to actual passenger volumes\n", "transformed_forecasts = list(model.forecast(steps=len(test)))\n", "boxcox_forecasts = []\n", "for idx in range(len(test)):\n", " if idx == 0:\n", " boxcox_forecast = (\n", " transformed_forecasts[idx] + train[\"Passengers_boxcox\"].iloc[-1]\n", " )\n", " else:\n", " boxcox_forecast = transformed_forecasts[idx] + boxcox_forecasts[idx - 1]\n", "\n", " boxcox_forecasts.append(boxcox_forecast)\n", "\n", "forecasts = inv_boxcox(boxcox_forecasts, lam)\n", "\n", "\n", "def plot_forecasts(forecasts: list[float], title: str) -> None:\n", " \"\"\"Function to plot the forecasts.\"\"\"\n", " fig = go.Figure()\n", " fig.add_trace(\n", " go.Scatter(x=train[\"Month\"], y=train[\"#Passengers\"], name=\"Train\")\n", " )\n", " fig.add_trace(\n", " go.Scatter(x=test[\"Month\"], y=test[\"#Passengers\"], name=\"Test\")\n", " )\n", " fig.add_trace(go.Scatter(x=test[\"Month\"], y=forecasts, name=\"Forecast\"))\n", " fig.update_layout(\n", " template=\"simple_white\",\n", " font=dict(size=18),\n", " title_text=title,\n", " width=650,\n", " title_x=0.5,\n", " height=400,\n", " xaxis_title=\"Date\",\n", " yaxis_title=\"Passenger Volume\",\n", " )\n", "\n", " return fig.show()\n", "\n", "\n", "# Plot the forecasts\n", "plot_forecasts(forecasts, \"Autoregression\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "py311-kfp240-airflow251", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 2 }