Name: 时间序列分析Skill
Rating: 5 (1 reviews)
Author: aj

时间序列分析

概览

时间序列分析检查随时间收集的数据点以识别模式、趋势和季节性，用于预测和理解时间动态。

何时使用

基于历史趋势预测未来值
在数据中检测季节性和周期性模式
分析随时间变化的销售、股票价格或网站流量趋势
理解自相关和时间依赖性
带有置信区间的时间基础预测
将数据分解为趋势、季节性和残差分量

核心组件

趋势：长期方向性运动
季节性：在固定间隔重复的模式
周期性：长期振荡（非固定周期）
平稳性：随时间恒定的均值、方差
自相关：与过去值的相关性

关键技术

分解：分离趋势、季节性、残差分量
差分：使数据平稳
ARIMA：自回归积分滑动平均模型
指数平滑：过去值的加权平均
SARIMA：季节性ARIMA模型

使用Python实现

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# 创建样本时间序列数据
dates = pd.date_range('2020-01-01', periods=365, freq='D')
values = 100 + np.sin(np.arange(365) * 2*np.pi / 365) * 20 + np.random.normal(0, 5, 365)
ts = pd.Series(values, index=dates)

# 可视化时间序列
fig, axes = plt.subplots(2, 2, figsize=(14, 8))

axes[0, 0].plot(ts)
axes[0, 0].set_title('原始时间序列')
axes[0, 0].set_ylabel('值')

# 分解
decomposition = seasonal_decompose(ts, model='additive', period=30)
axes[0, 1].plot(decomposition.trend)
axes[0, 1].set_title('趋势分量')

axes[1, 0].plot(decomposition.seasonal)
axes[1, 0].set_title('季节分量')

axes[1, 1].plot(decomposition.resid)
axes[1, 1].set_title('残差分量')

plt.tight_layout()
plt.show()

# 测试平稳性（增强Dickey-Fuller）
result = adfuller(ts)
print(f"ADF检验统计量: {result[0]:.6f}")
print(f"P值: {result[1]:.6f}")
print(f"临界值: {result[4]}")

if result[1] <= 0.05:
    print("时间序列是平稳的")
else:
    print("时间序列是非平稳的 - 需要差分")

# 首次差分为平稳性
ts_diff = ts.diff().dropna()
result_diff = adfuller(ts_diff)
print(f"
差分后 - ADF p值: {result_diff[1]:.6f}")

# 自相关和偏自相关
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

plot_acf(ts_diff, lags=40, ax=axes[0])
axes[0].set_title('ACF')

plot_pacf(ts_diff, lags=40, ax=axes[1])
axes[1].set_title('PACF')

plt.tight_layout()
plt.show()

# ARIMA模型
arima_model = ARIMA(ts, order=(1, 1, 1))
arima_result = arima_model.fit()
print(arima_result.summary())

# 预测
forecast_steps = 30
forecast = arima_result.get_forecast(steps=forecast_steps)
forecast_df = forecast.conf_int()
forecast_mean = forecast.predicted_mean

# 绘制预测
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(ts.index[-90:], ts[-90:], label='历史')
ax.plot(forecast_df.index, forecast_mean, label='预测', color='red')
ax.fill_between(
    forecast_df.index,
    forecast_df.iloc[:, 0],
    forecast_df.iloc[:, 1],
    color='red', alpha=0.2
)
ax.set_title('ARIMA预测与置信区间')
ax.legend()
ax.grid(True, alpha=0.3)
plt.show()

# 指数平滑
exp_smooth = ExponentialSmoothing(
    ts, seasonal_periods=30, trend='add', seasonal='add', initialization_method='estimated'
)
exp_result = exp_smooth.fit()

# 模型诊断
fig = exp_result.plot_diagnostics(figsize=(12, 8))
plt.tight_layout()
plt.show()

# 自定义移动平均分析
window_sizes = [7, 30, 90]
fig, ax = plt.subplots(figsize=(12, 5))

ax.plot(ts.index, ts.values, label='原始', alpha=0.7)

for window in window_sizes:
    ma = ts.rolling(window=window).mean()
    ax.plot(ma.index, ma.values, label=f'MA({window})')

ax.set_title('移动平均')
ax.legend()
ax.grid(True, alpha=0.3)
plt.show()

# 季节性子序列图
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
for i, month in enumerate(range(1, 5)):
    month_data = ts[ts.index.month == month]
    axes[i // 2, i % 2].plot(month_data.values)
    axes[i // 2, i % 2].set_title(f'月份 {month} 模式')

plt.tight_layout()
plt.show()

# 预测准确性指标
def calculate_forecast_metrics(actual, predicted):
    mae = np.mean(np.abs(actual - predicted))
    rmse = np.sqrt(np.mean((actual - predicted) ** 2))
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100
    return {'MAE': mae, 'RMSE': rmse, 'MAPE': mape}

metrics = calculate_forecast_metrics(ts[-30:], forecast_mean[:30])
print(f"
预测指标:
{metrics}")

# 额外分析技术

# 第10步：季节性子序列图
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
for i, season in enumerate([1, 2, 3, 4]):
    seasonal_ts = ts[ts.index.month % 4 == season % 4]
    axes[i // 2, i % 2].plot(seasonal_ts.values)
    axes[i // 2, i % 2].set_title(f'季节 {season}')
plt.tight_layout()
plt.show()

# 第11步：Granger因果关系（多系列）
from statsmodels.tsa.stattools import grangercausalitytests

# 创建另一个系列进行测试
ts2 = ts.shift(1).fillna(method='bfill')

try:
    print("
Granger因果关系测试:")
    print(f"测试ts2是否Granger导致ts:")
    gc_result = grangercausalitytests(np.column_stack([ts.values, ts2.values]), maxlag=3)
except Exception as e:
    print(f"Granger因果关系未执行: {str(e)[:50]}")

# 第12步：自相关和偏自相关分析
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

acf_values = acf(ts.dropna(), nlags=20)
pacf_values = pacf(ts.dropna(), nlags=20)

# 第13步：季节强度
def seasonal_strength(series, seasonal_period=30):
    seasonal = seasonal_decompose(series, model='additive', period=seasonal_period)
    var_residual = np.var(seasonal.resid.dropna())
    var_seasonal = np.var(seasonal.seasonal)
    return 1 - (var_residual / (var_residual + var_seasonal)) if (var_residual + var_seasonal) > 0 else 0

ss = seasonal_strength(ts)
print(f"
季节强度: {ss:.3f}")

# 第14步：预测不确定性
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(ts.index[-60:], ts.values[-60:], label='历史', linewidth=2)

# 多步预测
for steps_ahead in [10, 20, 30]:
    try:
        fc = arima_result.get_forecast(steps=steps_ahead)
        fc_mean = fc.predicted_mean
        ax.plot(pd.date_range(ts.index[-1], periods=steps_ahead+1)[1:],
               fc_mean.values, marker='o', label=f'预测 (+{steps_ahead})')
    except:
        pass

ax.set_title('多步预测')
ax.set_xlabel('日期')
ax.set_ylabel('值')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# 第15步：模型比较总结
print("
时间序列分析完成！")
print(f"原始系列长度: {len(ts)}")
print(f"趋势强度: {1 - np.var(decomposition.resid.dropna()) / np.var((ts - ts.mean()).dropna()):.3f}")
print(f"季节强度: {ss:.3f}")

平稳性

平稳性：均值、方差、自相关随时间恒定
非平稳性：存在趋势或季节性模式
解决方案：差分、对数转换或去趋势

模型选择

ARIMA：适用于单变量预测
SARIMA：包含季节性分量
指数平滑：更简单，适用于趋势
Prophet：处理节假日和变化点

评估指标

MAE：平均绝对误差
RMSE：均方根误差
MAPE：平均绝对百分比误差

交付物

分解分析图表
平稳性测试结果
ACF/PACF图
拟合模型与诊断
预测与置信区间
准确性指标比较