Overview of ARIMA Prediction
ARIMA stands for AutoRegressive Integrated Moving Average , an algorithm used to predict future values from past values of a times series. ARIMA requires the dataset to be "stationary" in order to produce accurate predictions means parameters such as mean, variance, and covariance remain unchanged with time.
import pandas as pd import matplotlib.pyplot as plt import numpy as np df = pd.read_csv(EURUSD=X.csv) df
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
0 | 2020-07-17 | 1.138650 | 1.144165 | 1.137889 | 1.139212 | 1.139212 | 0 |
1 | 2020-07-20 | 1.143955 | 1.146789 | 1.140524 | 1.144296 | 1.144296 | 0 |
2 | 2020-07-21 | 1.145764 | 1.149801 | 1.142622 | 1.145869 | 1.145869 | 0 |
3 | 2020-07-22 | 1.153509 | 1.160093 | 1.150854 | 1.153403 | 1.153403 | 0 |
4 | 2020-07-23 | 1.156671 | 1.162426 | 1.154215 | 1.156872 | 1.156872 | 0 |
... | ... | ... | ... | ... | ... | ... | ... |
256 | 2021-07-12 | 1.187366 | 1.188213 | 1.183670 | 1.187296 | 1.187296 | 0 |
257 | 2021-07-13 | 1.186493 | 1.187790 | 1.179343 | 1.186521 | 1.186521 | 0 |
258 | 2021-07-14 | 1.177440 | 1.182801 | 1.177260 | 1.177537 | 1.177537 | 0 |
259 | 2021-07-15 | 1.183334 | 1.185115 | 1.180596 | 1.183334 | 1.183334 | 0 |
260 | 2021-07-16 | 1.181307 | 1.182300 | 1.179400 | 1.181181 | 1.181181 | 0 |
261 rows × 7 columns
from pandas import datetime df['Date'] = pd.to_datetime(df.Date,format='%Y-%m-%d') df.index = df['Date'] plt.figure(figsize=(16,8)) plt.plot(df['Close'], label='Close Price history')
[<matplotlib.lines.Line2D at 0x7f886b129828>]
from pandas import datetime from pandas.plotting import autocorrelation_plot def parser(x): return pd.to_datetime(df.Date,format='%Y-%m-%d') df = pd.read_csv('EURUSD=X.csv', header=0, parse_dates=[0], index_col=0, date_parser=parser) autocorrelation_plot(df) plt.plot()
[]
print(df.head()) df.plot() plt.show()
Open High Low Close Adj Close Volume Date 2020-07-17 1.138650 1.144165 1.137889 1.139212 1.139212 0 2020-07-20 1.143955 1.146789 1.140524 1.144296 1.144296 0 2020-07-21 1.145764 1.149801 1.142622 1.145869 1.145869 0 2020-07-22 1.153509 1.160093 1.150854 1.153403 1.153403 0 2020-07-23 1.156671 1.162426 1.154215 1.156872 1.156872 0
import matplotlib.dates as mdates %matplotlib inline import seaborn as sns my_year_month_fmt = mdates.DateFormatter('%m/%y') data = pd.read_csv('EURUSD=X.csv') data.head(10)
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
0 | 2020-07-17 | 1.138650 | 1.144165 | 1.137889 | 1.139212 | 1.139212 | 0 |
1 | 2020-07-20 | 1.143955 | 1.146789 | 1.140524 | 1.144296 | 1.144296 | 0 |
2 | 2020-07-21 | 1.145764 | 1.149801 | 1.142622 | 1.145869 | 1.145869 | 0 |
3 | 2020-07-22 | 1.153509 | 1.160093 | 1.150854 | 1.153403 | 1.153403 | 0 |
4 | 2020-07-23 | 1.156671 | 1.162426 | 1.154215 | 1.156872 | 1.156872 | 0 |
5 | 2020-07-24 | 1.159501 | 1.164009 | 1.158171 | 1.159608 | 1.159608 | 0 |
6 | 2020-07-27 | 1.165257 | 1.177953 | 1.165257 | 1.165257 | 1.165257 | 0 |
7 | 2020-07-28 | 1.176651 | 1.177579 | 1.170100 | 1.176928 | 1.176928 | 0 |
8 | 2020-07-29 | 1.172058 | 1.177899 | 1.171495 | 1.171880 | 1.171880 | 0 |
9 | 2020-07-30 | 1.178689 | 1.180735 | 1.173268 | 1.178287 | 1.178287 | 0 |
# Calculating the short-window simple moving average short_rolling = data.rolling(window=20).mean() short_rolling.head(20)
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
0 | NaN | NaN | NaN | NaN | NaN | NaN |
1 | NaN | NaN | NaN | NaN | NaN | NaN |
2 | NaN | NaN | NaN | NaN | NaN | NaN |
3 | NaN | NaN | NaN | NaN | NaN | NaN |
4 | NaN | NaN | NaN | NaN | NaN | NaN |
5 | NaN | NaN | NaN | NaN | NaN | NaN |
6 | NaN | NaN | NaN | NaN | NaN | NaN |
7 | NaN | NaN | NaN | NaN | NaN | NaN |
8 | NaN | NaN | NaN | NaN | NaN | NaN |
9 | NaN | NaN | NaN | NaN | NaN | NaN |
10 | NaN | NaN | NaN | NaN | NaN | NaN |
11 | NaN | NaN | NaN | NaN | NaN | NaN |
12 | NaN | NaN | NaN | NaN | NaN | NaN |
13 | NaN | NaN | NaN | NaN | NaN | NaN |
14 | NaN | NaN | NaN | NaN | NaN | NaN |
15 | NaN | NaN | NaN | NaN | NaN | NaN |
16 | NaN | NaN | NaN | NaN | NaN | NaN |
17 | NaN | NaN | NaN | NaN | NaN | NaN |
18 | NaN | NaN | NaN | NaN | NaN | NaN |
19 | 1.169727 | 1.174497 | 1.166116 | 1.169748 | 1.169748 | 0.0 |
# Calculating the long-window simple moving average long_rolling = data.rolling(window=100).mean() long_rolling.tail()
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
256 | 1.200753 | 1.203508 | 1.197616 | 1.200819 | 1.200819 | 0.0 |
257 | 1.200451 | 1.203207 | 1.197272 | 1.200517 | 1.200517 | 0.0 |
258 | 1.200073 | 1.202861 | 1.196934 | 1.200141 | 1.200141 | 0.0 |
259 | 1.199737 | 1.202470 | 1.196583 | 1.199803 | 1.199803 | 0.0 |
260 | 1.199390 | 1.202111 | 1.196285 | 1.199454 | 1.199454 | 0.0 |
data['Close'].plot(grid=True,figsize=(8,5))
<AxesSubplot:>
data['42d'] = np.round(data['Close'].rolling(window=42).mean(),2) data['252d'] = np.round(data['Close'].rolling(window=252).mean(),2) data.tail
<bound method NDFrame.tail of Date Open High Low Close Adj Close Volume \ 0 2020-07-17 1.138650 1.144165 1.137889 1.139212 1.139212 0 1 2020-07-20 1.143955 1.146789 1.140524 1.144296 1.144296 0 2 2020-07-21 1.145764 1.149801 1.142622 1.145869 1.145869 0 3 2020-07-22 1.153509 1.160093 1.150854 1.153403 1.153403 0 4 2020-07-23 1.156671 1.162426 1.154215 1.156872 1.156872 0 .. ... ... ... ... ... ... ... 256 2021-07-12 1.187366 1.188213 1.183670 1.187296 1.187296 0 257 2021-07-13 1.186493 1.187790 1.179343 1.186521 1.186521 0 258 2021-07-14 1.177440 1.182801 1.177260 1.177537 1.177537 0 259 2021-07-15 1.183334 1.185115 1.180596 1.183334 1.183334 0 260 2021-07-16 1.181307 1.182300 1.179400 1.181181 1.181181 0 42d 252d 0 NaN NaN 1 NaN NaN 2 NaN NaN 3 NaN NaN 4 NaN NaN .. ... ... 256 1.21 1.2 257 1.20 1.2 258 1.20 1.2 259 1.20 1.2 260 1.20 1.2 [261 rows x 9 columns]>
data[['Close','42d','252d']].plot(grid=True,figsize=(8,5))
<AxesSubplot:>
data['42-252'] = data['42d'] - data['252d'] X = 50 data['Stance'] = np.where(data['42-252'] > X, 1, 0) data['Stance'] = np.where(data['42-252'] < -X, -1, data['Stance']) data['Stance'].value_counts()
0 261 Name: Stance, dtype: int64
data['Stance'].plot()
<AxesSubplot:>
data['Market Returns'] = np.log(data['Close'] / data['Close'].shift(1)) data['Strategy'] = data['Market Returns'] * data['Stance'].shift(1) data[['Market Returns','Strategy']].cumsum().plot(grid=True,figsize=(8,5))
<AxesSubplot:>
data["diff"] = data["42d"] - data["252d"] data[["Close", "diff"]].plot(subplots=True, figsize=(8, 5))
array([<AxesSubplot:>, <AxesSubplot:>], dtype=object)
sigdiff = 100.0 data["Signal"] = np.where(data["diff"]>sigdiff,1,0) data["Signal"] = np.where(data["diff"]<-sigdiff,-1,0) data[["Close","diff","Signal"]].plot(subplots=True, figsize=(8,8))
array([<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>], dtype=object)
data["Returns"] = np.log(data["Close"] / data["Close"].shift(1)) data["Strategy"] = (data["Signal"] * data["Returns"]) data["Earnings"] = data["Strategy"].cumsum() data[["Close", "Signal", "Earnings"]].plot(subplots=True, figsize=(10, 8))
array([<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>], dtype=object)
We used ARIMA Model to analyze and forecast the data. Calculated moving average and logarithimc returns , autocorrealtion function and signal generation.