arma

    技术2022-07-10  107

    # 用ARMA进行时间序列预测 import pandas as pd import matplotlib.pyplot as plt import statsmodels.api as sm from statsmodels.tsa.arima_model import ARMA from statsmodels.graphics.api import qqplot # 创建数据 data = [3821, 4236, 3758, 6783, 4664, 2589, 2538, 3542, 4626, 5886, 6233, 4199, 3561, 2335, 5636, 3524, 4327, 6064, 3912, 1356, 4305, 4379, 4592, 4233, 4281, 1613, 1233, 4514, 3431, 2159, 2322, 4239, 4733, 2268, 5397, 5821, 6115, 6631, 6474, 4134, 2728, 5753, 7130, 7860, 6991, 7499, 5301, 2808, 6755, 6658, 6944, 6372, 8380, 7366, 6352, 8333, 8281, 11548, 10823, 13642, 9973, 6723, 13416, 12205, 13942, 9590, 11693, 9276, 6519, 6863, 8237, 10122, 8646, 9749, 5346, 4836, 9806, 7502, 9387, 11078, 9832, 6886, 4285, 8351, 9725, 11844, 12387, 10666, 7072, 6429] data=pd.Series(data) data_index = sm.tsa.datetools.dates_from_range('1901','1990') #print(data_index) # 绘制数据图 data.index = pd.Index(data_index) data.plot(figsize=(12,8)) plt.show() # 创建ARMA模型# 创建ARMA模型 arma = ARMA(data,(7,0)).fit() print('AIC: %0.4lf' %arma.aic) # 模型预测 predict_y = arma.predict('1990', '2000') # 预测结果绘制 fig, ax = plt.subplots(figsize=(12, 8)) ax = data.ix['1901':].plot(ax=ax) predict_y.plot(ax=ax) plt.show()

    stock_arima

    # -*- coding: utf-8 -*- # 沪市指数走势预测,使用时间序列ARMA import numpy as np import pandas as pd import matplotlib.pyplot as plt from statsmodels.tsa.arima_model import ARIMA import statsmodels.api as sm import warnings from itertools import product from datetime import datetime, timedelta import calendar warnings.filterwarnings('ignore') plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签 # 数据加载 df = pd.read_csv('./shanghai_index_1990_12_19_to_2020_03_12.csv') df = df[['Timestamp', 'Price']] # 将时间作为df的索引 df.Timestamp = pd.to_datetime(df.Timestamp) df.index = df.Timestamp # 数据探索 print(df.head()) # 按照月,季度,年来统计 df_month = df.resample('M').mean() df_Q = df.resample('Q-DEC').mean() df_year = df.resample('A-DEC').mean() print(df_month) # 设置参数范围 ps = range(0, 5) qs = range(0, 5) ds = range(1, 2) parameters = product(ps, ds, qs) parameters_list = list(parameters) # 寻找最优ARMA模型参数,即best_aic最小 results = [] best_aic = float("inf") # 正无穷 for param in parameters_list: try: #model = ARIMA(df_month.Price,order=(param[0], param[1], param[2])).fit() # SARIMAX 包含季节趋势因素的ARIMA模型 model = sm.tsa.statespace.SARIMAX(df_month.Price, order=(param[0], param[1], param[2]), #seasonal_order=(4, 1, 2, 12), enforce_stationarity=False, enforce_invertibility=False).fit() except ValueError: print('参数错误:', param) continue aic = model.aic if aic < best_aic: best_model = model best_aic = aic best_param = param results.append([param, model.aic]) # 输出最优模型 print('最优模型: ', best_model.summary()) # 设置future_month,需要预测的时间date_list df_month2 = df_month[['Price']] future_month = 3 last_month = pd.to_datetime(df_month2.index[len(df_month2)-1]) date_list = [] for i in range(future_month): # 计算下个月有多少天 year = last_month.year month = last_month.month if month == 12: month = 1 year = year+1 else: month = month + 1 next_month_days = calendar.monthrange(year, month)[1] #print(next_month_days) last_month = last_month + timedelta(days=next_month_days) date_list.append(last_month) print('date_list=', date_list) # 添加未来要预测的3个月 future = pd.DataFrame(index=date_list, columns= df_month.columns) df_month2 = pd.concat([df_month2, future]) # get_prediction得到的是区间,使用predicted_mean df_month2['forecast'] = best_model.get_prediction(start=0, end=len(df_month2)).predicted_mean # 沪市指数预测结果显示 plt.figure(figsize=(30,7)) df_month2.Price.plot(label='实际指数') df_month2.forecast.plot(color='r', ls='--', label='预测指数') plt.legend() plt.title('沪市指数(月)') plt.xlabel('时间') plt.ylabel('指数') plt.show()

    stock_prophet

    # 使用Prophet预测manning未来365天的页面流量 # 从2007年12月10日开始 import pandas as pd from fbprophet import Prophet import matplotlib.pyplot as plt %matplotlib inline # 读入数据集 df = pd.read_csv('./shanghai_index_1990_12_19_to_2020_03_12.csv') # 修改列名 Timestamp => ds, Price => y df.rename(columns={'Timestamp':'ds', 'Price':'y'}, inplace=True) print(df.head()) #print(df.tail()) # 拟合模型 model = Prophet() model.fit(df) # 构建待预测日期数据框,periods = 365 代表除历史数据的日期外再往后推 365 天 future = model.make_future_dataframe(periods=365) #print(future.tail()) # 预测数据集 forecast = model.predict(future) #print(forecast.columns) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) # 展示预测结果 model.plot(forecast) plt.show() # 预测的成分分析绘图,展示预测中的趋势、周效应和年度效应 model.plot_components(forecast) print(forecast.columns)

    stock_tsa

    # 使用tsa对沪市指数进行分析:trend, seasonal, residual import statsmodels.api as sm import matplotlib.pyplot as plt import pandas as pd # 数据加载 data = pd.read_csv('shanghai_index_1990_12_19_to_2020_03_12.csv', usecols=['Timestamp', 'Price']) print(data.head()) # 转换为pandas中的日期格式 data.Timestamp = pd.to_datetime(data.Timestamp) data = data.set_index('Timestamp') data['Price'] = data['Price'].apply(pd.to_numeric, errors='ignore') # 进行线性插补缺漏值, 插入的空缺值,比如 1990-12-25日 本来应该有数值,但是为None print(data.Price.shape) # 插入的是Price字段 data.Price.interpolate(inplace=True) print(data.head()) # 返回三个部分 trend(趋势),seasonal(季节性)和residual (残留) # price = trend + seasonal + resid, 设置freq = 1年内的有效数据个数,平均值,freq=365 result = sm.tsa.seasonal_decompose(data.Price, freq=288) #print(result) result.plot() plt.show()

    stock_prophet.py

    #!/usr/bin/env python # coding: utf-8 # In[6]: # 使用Prophet预测manning未来365天的页面流量 # 从2007年12月10日开始 import pandas as pd from fbprophet import Prophet import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # 读入数据集 df = pd.read_csv('./shanghai_index_1990_12_19_to_2020_03_12.csv') # 修改列名 Timestamp => ds, Price => y df.rename(columns={'Timestamp':'ds', 'Price':'y'}, inplace=True) print(df.head()) # In[7]: #print(df.tail()) # 拟合模型 model = Prophet() model.fit(df) # 构建待预测日期数据框,periods = 365 代表除历史数据的日期外再往后推 365 天 future = model.make_future_dataframe(periods=365) #print(future.tail()) # 预测数据集 forecast = model.predict(future) #print(forecast.columns) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) # 展示预测结果 model.plot(forecast) plt.show() # In[8]: # 预测的成分分析绘图,展示预测中的趋势、周效应和年度效应 model.plot_components(forecast) print(forecast.columns)
    Processed: 0.026, SQL: 9