metric-value">12.89
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# Create sample time series
np.random.seed(42)
dates = pd.date_range('2020-01-01', '2023-12-31', freq='D')
trend = np.linspace(100, 400, len(dates))
seasonal = 50 * np.sin(2 * np.pi * np.arange(len(dates)) / 365.25)
noise = np.random.randn(len(dates)) * 10
ts = pd.Series(trend + seasonal + noise, index=dates)
# Split into train and test
train_size = int(len(ts) * 0.8)
train, test = ts[:train_size], ts[train_size:]
print(f"Train set: {len(train)} observations")
print(f"Test set: {len(test)} observations")
# Forecasting Methods
#####################
# 1. Naive Methods
##################
# Last value (Naive)
naive_forecast = pd.Series([train.iloc[-1]] * len(test), index=test.index)
# Seasonal Naive
seasonal_period = 365
seasonal_naive = train.iloc[-seasonal_period:].values
seasonal_naive_forecast = pd.Series(
np.tile(seasonal_naive, len(test) // seasonal_period + 1)[:len(test)],
index=test.index
)
# Average method
average_forecast = pd.Series([train.mean()] * len(test), index=test.index)
# 2. Moving Average Forecast
#############################
def moving_average_forecast(train, test, window=30):
"""Simple moving average forecast"""
forecast = []
history = list(train)
for _ in range(len(test)):
ma = np.mean(history[-window:])
forecast.append(ma)
# In practice, would add actual observation
history.append(ma)
return pd.Series(forecast, index=test.index)
ma_forecast = moving_average_forecast(train, test)
# 3. Exponential Smoothing
##########################
# Simple Exponential Smoothing
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
ses_model = SimpleExpSmoothing(train)
ses_fit = ses_model.fit(smoothing_level=0.2, optimized=False)
ses_forecast = ses_fit.forecast(len(test))
# Double Exponential Smoothing (Holt's method)
from statsmodels.tsa.holtwinters import Holt
holt_model = Holt(train)
holt_fit = holt_model.fit()
holt_forecast = holt_fit.forecast(len(test))
# Triple Exponential Smoothing (Holt-Winters)
hw_model = ExponentialSmoothing(
train,
seasonal_periods=365,
trend='add',
seasonal='add',
damped_trend=True
)
hw_fit = hw_model.fit()
hw_forecast = hw_fit.forecast(len(test))
# 4. ARIMA Models
#################
# Auto ARIMA (find best parameters)
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
# Check stationarity and difference if needed
adf_result = adfuller(train)
if adf_result[1] > 0.05:
train_diff = train.diff().dropna()
d = 1
else:
train_diff = train
d = 0
# Fit ARIMA model
arima_model = ARIMA(train, order=(2, d, 2)) # (p, d, q)
arima_fit = arima_model.fit()
arima_forecast = arima_fit.forecast(steps=len(test))
# SARIMA (Seasonal ARIMA)
from statsmodels.tsa.statespace.sarimax import SARIMAX
sarima_model = SARIMAX(
train,
order=(1, 1, 1),
seasonal_order=(1, 1, 1, 365),
enforce_stationarity=False,
enforce_invertibility=False
)
sarima_fit = sarima_model.fit(disp=False)
sarima_forecast = sarima_fit.forecast(steps=len(test))
# 5. Prophet (Facebook's forecasting tool)
###########################################
from prophet import Prophet
# Prepare data for Prophet
prophet_train = pd.DataFrame({
'ds': train.index,
'y': train.values
})
# Fit Prophet model
prophet_model = Prophet(
yearly_seasonality=True,
weekly_seasonality=False,
daily_seasonality=False,
changepoint_prior_scale=0.05
)
prophet_model.fit(prophet_train)
# Make future dataframe
future = prophet_model.make_future_dataframe(periods=len(test), freq='D')
prophet_pred = prophet_model.predict(future)
prophet_forecast = prophet_pred.loc[prophet_pred['ds'].isin(test.index), 'yhat'].values
# Model Evaluation
##################
def evaluate_forecast(actual, forecast, model_name):
"""Calculate forecast metrics"""
mae = mean_absolute_error(actual, forecast)
rmse = np.sqrt(mean_squared_error(actual, forecast))
mape = np.mean(np.abs((actual - forecast) / actual)) * 100
r2 = r2_score(actual, forecast)
return {
'Model': model_name,
'MAE': mae,
'RMSE': rmse,
'MAPE': mape,
'R2': r2
}
# Evaluate all models
results = []
models = {
'Naive': naive_forecast,
'Seasonal Naive': seasonal_naive_forecast[:len(test)],
'Moving Average': ma_forecast,
'Simple Exp Smoothing': ses_forecast,
'Holt': holt_forecast,
'Holt-Winters': hw_forecast,
'ARIMA': arima_forecast
}
for name, forecast in models.items():
if len(forecast) == len(test):
results.append(evaluate_forecast(test.values, forecast.values, name))
results_df = pd.DataFrame(results)
results_df = results_df.sort_values('RMSE')
print("\nModel Performance Comparison:")
print(results_df.to_string(index=False))
# Visualization
###############
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
# Plot 1: All forecasts comparison
ax1 = axes[0, 0]
train.plot(ax=ax1, label='Training Data', color='blue', alpha=0.7)
test.plot(ax=ax1, label='Actual', color='black', linewidth=2)
ma_forecast.plot(ax=ax1, label='Moving Average', alpha=0.7)
holt_forecast.plot(ax=ax1, label='Holt', alpha=0.7)
hw_forecast.plot(ax=ax1, label='Holt-Winters', alpha=0.7)
ax1.set_title('Forecast Comparison')
ax1.legend(loc='upper left')
ax1.grid(True, alpha=0.3)
# Plot 2: Best model forecast with confidence intervals
ax2 = axes[0, 1]
best_model_name = results_df.iloc[0]['Model']
best_forecast = models[best_model_name]
train.plot(ax=ax2, label='Training Data', color='blue', alpha=0.7)
test.plot(ax=ax2, label='Actual', color='black', linewidth=2)
best_forecast.plot(ax=ax2, label=f'Best: {best_model_name}', color='red', linewidth=2)
# Add confidence intervals (example for ARIMA)
if best_model_name == 'ARIMA':
forecast_result = arima_fit.get_forecast(steps=len(test))
conf_int = forecast_result.conf_int()
ax2.fill_between(test.index,
conf_int.iloc[:, 0],
conf_int.iloc[:, 1],
alpha=0.2, color='red')
ax2.set_title(f'Best Model: {best_model_name}')
ax2.legend()
ax2.grid(True, alpha=0.3)
# Plot 3: Residuals analysis
ax3 = axes[1, 0]
residuals = test - best_forecast
residuals.plot(ax=ax3, label='Residuals', color='green')
ax3.axhline(y=0, color='red', linestyle='--', alpha=0.5)
ax3.set_title('Forecast Residuals')
ax3.set_ylabel('Residual')
ax3.legend()
ax3.grid(True, alpha=0.3)
# Plot 4: Residuals distribution
ax4 = axes[1, 1]
residuals.hist(ax=ax4, bins=30, edgecolor='black', alpha=0.7)
ax4.axvline(x=0, color='red', linestyle='--', linewidth=2)
ax4.set_title('Residuals Distribution')
ax4.set_xlabel('Residual')
ax4.set_ylabel('Frequency')
# Add normal distribution overlay
from scipy import stats
mu, std = residuals.mean(), residuals.std()
xmin, xmax = ax4.get_xlim()
x = np.linspace(xmin, xmax, 100)
p = stats.norm.pdf(x, mu, std) * len(residuals) * (xmax - xmin) / 30
ax4.plot(x, p, 'k-', linewidth=2, label='Normal')
ax4.legend()
plt.tight_layout()
plt.show()
# Cross-Validation for Time Series
##################################
from sklearn.model_selection import TimeSeriesSplit
def time_series_cv(ts, model_func, n_splits=5):
"""
Perform time series cross-validation
"""
tscv = TimeSeriesSplit(n_splits=n_splits)
scores = []
for train_idx, test_idx in tscv.split(ts):
train_cv = ts.iloc[train_idx]
test_cv = ts.iloc[test_idx]
# Apply model function
forecast_cv = model_func(train_cv, len(test_cv))
# Calculate RMSE
rmse = np.sqrt(mean_squared_error(test_cv, forecast_cv))
scores.append(rmse)
return np.mean(scores), np.std(scores)
# Example: Cross-validate moving average
def ma_model(train, horizon):
return pd.Series([train.rolling(30).mean().iloc[-1]] * horizon)
cv_mean, cv_std = time_series_cv(train, ma_model)
print(f"\nCross-validation RMSE: {cv_mean:.2f} ± {cv_std:.2f}")
pd.date_range() - Create date rangespd.to_datetime() - Convert to datetimedf.resample() - Change frequencydf.rolling() - Rolling window operationsdf.shift() - Lag operationsdf.diff() - Differencingdf.expanding() - Expanding windowdf.ewm() - Exponential weighted functionsdf.asfreq() - Convert to specified frequencydf.interpolate() - Fill missing values'D' - Daily'B' - Business day'W' - Weekly'M' - Month end'MS' - Month start'Q' - Quarter end'Y' - Year end'H' - Hourly'T' or 'min' - Minutely'S' - Secondly