• For any query, contact us at
  • +91-9872993883
  • +91-8283824812
  • info@ris-ai.com

Overview of ARIMA Prediction

ARIMA stands for AutoRegressive Integrated Moving Average , an algorithm used to predict future values from past values of a times series. ARIMA requires the dataset to be "stationary" in order to produce accurate predictions means parameters such as mean, variance, and covariance remain unchanged with time.

Time Series Analysis - ARIMA Model

In [84]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df = pd.read_csv(EURUSD=X.csv)
df
Out[84]:
Date Open High Low Close Adj Close Volume
0 2020-07-17 1.138650 1.144165 1.137889 1.139212 1.139212 0
1 2020-07-20 1.143955 1.146789 1.140524 1.144296 1.144296 0
2 2020-07-21 1.145764 1.149801 1.142622 1.145869 1.145869 0
3 2020-07-22 1.153509 1.160093 1.150854 1.153403 1.153403 0
4 2020-07-23 1.156671 1.162426 1.154215 1.156872 1.156872 0
... ... ... ... ... ... ... ...
256 2021-07-12 1.187366 1.188213 1.183670 1.187296 1.187296 0
257 2021-07-13 1.186493 1.187790 1.179343 1.186521 1.186521 0
258 2021-07-14 1.177440 1.182801 1.177260 1.177537 1.177537 0
259 2021-07-15 1.183334 1.185115 1.180596 1.183334 1.183334 0
260 2021-07-16 1.181307 1.182300 1.179400 1.181181 1.181181 0

261 rows × 7 columns

In [85]:
from pandas import datetime
df['Date'] = pd.to_datetime(df.Date,format='%Y-%m-%d')
df.index = df['Date']

plt.figure(figsize=(16,8))
plt.plot(df['Close'], label='Close Price history')
Out[85]:
[<matplotlib.lines.Line2D at 0x7f886b129828>]

Plot a graph using ARIMA model

In [86]:
from pandas import datetime
from pandas.plotting import autocorrelation_plot

def parser(x):
	return pd.to_datetime(df.Date,format='%Y-%m-%d')
           
df = pd.read_csv('EURUSD=X.csv', header=0, parse_dates=[0], index_col=0, date_parser=parser)
autocorrelation_plot(df)
plt.plot()
Out[86]:
[]
In [4]:
print(df.head())
df.plot()
plt.show()
                Open      High       Low     Close  Adj Close  Volume
Date                                                                 
2020-07-17  1.138650  1.144165  1.137889  1.139212   1.139212       0
2020-07-20  1.143955  1.146789  1.140524  1.144296   1.144296       0
2020-07-21  1.145764  1.149801  1.142622  1.145869   1.145869       0
2020-07-22  1.153509  1.160093  1.150854  1.153403   1.153403       0
2020-07-23  1.156671  1.162426  1.154215  1.156872   1.156872       0
In [64]:
import matplotlib.dates as mdates
%matplotlib inline
import seaborn as sns


my_year_month_fmt = mdates.DateFormatter('%m/%y')

data = pd.read_csv('EURUSD=X.csv')
data.head(10)
Out[64]:
Date Open High Low Close Adj Close Volume
0 2020-07-17 1.138650 1.144165 1.137889 1.139212 1.139212 0
1 2020-07-20 1.143955 1.146789 1.140524 1.144296 1.144296 0
2 2020-07-21 1.145764 1.149801 1.142622 1.145869 1.145869 0
3 2020-07-22 1.153509 1.160093 1.150854 1.153403 1.153403 0
4 2020-07-23 1.156671 1.162426 1.154215 1.156872 1.156872 0
5 2020-07-24 1.159501 1.164009 1.158171 1.159608 1.159608 0
6 2020-07-27 1.165257 1.177953 1.165257 1.165257 1.165257 0
7 2020-07-28 1.176651 1.177579 1.170100 1.176928 1.176928 0
8 2020-07-29 1.172058 1.177899 1.171495 1.171880 1.171880 0
9 2020-07-30 1.178689 1.180735 1.173268 1.178287 1.178287 0

Calculation of moving average

In [65]:
# Calculating the short-window simple moving average
short_rolling = data.rolling(window=20).mean()
short_rolling.head(20)
Out[65]:
Open High Low Close Adj Close Volume
0 NaN NaN NaN NaN NaN NaN
1 NaN NaN NaN NaN NaN NaN
2 NaN NaN NaN NaN NaN NaN
3 NaN NaN NaN NaN NaN NaN
4 NaN NaN NaN NaN NaN NaN
5 NaN NaN NaN NaN NaN NaN
6 NaN NaN NaN NaN NaN NaN
7 NaN NaN NaN NaN NaN NaN
8 NaN NaN NaN NaN NaN NaN
9 NaN NaN NaN NaN NaN NaN
10 NaN NaN NaN NaN NaN NaN
11 NaN NaN NaN NaN NaN NaN
12 NaN NaN NaN NaN NaN NaN
13 NaN NaN NaN NaN NaN NaN
14 NaN NaN NaN NaN NaN NaN
15 NaN NaN NaN NaN NaN NaN
16 NaN NaN NaN NaN NaN NaN
17 NaN NaN NaN NaN NaN NaN
18 NaN NaN NaN NaN NaN NaN
19 1.169727 1.174497 1.166116 1.169748 1.169748 0.0
In [8]:
# Calculating the long-window simple moving average
long_rolling = data.rolling(window=100).mean()
long_rolling.tail()
Out[8]:
Open High Low Close Adj Close Volume
256 1.200753 1.203508 1.197616 1.200819 1.200819 0.0
257 1.200451 1.203207 1.197272 1.200517 1.200517 0.0
258 1.200073 1.202861 1.196934 1.200141 1.200141 0.0
259 1.199737 1.202470 1.196583 1.199803 1.199803 0.0
260 1.199390 1.202111 1.196285 1.199454 1.199454 0.0
In [66]:
data['Close'].plot(grid=True,figsize=(8,5))
Out[66]:
<AxesSubplot:>
In [67]:
data['42d'] = np.round(data['Close'].rolling(window=42).mean(),2)
data['252d'] = np.round(data['Close'].rolling(window=252).mean(),2)
data.tail
Out[67]:
In [68]:
data[['Close','42d','252d']].plot(grid=True,figsize=(8,5))
Out[68]:
In [70]:
data['42-252'] = data['42d'] - data['252d']
X = 50
data['Stance'] = np.where(data['42-252'] > X, 1, 0)
data['Stance'] = np.where(data['42-252'] < -X, -1, data['Stance'])
data['Stance'].value_counts()
Out[70]:
In [78]:
data['Stance'].plot()
Out[78]:
In [88]:
data['Market Returns'] = np.log(data['Close'] / data['Close'].shift(1))
data['Strategy'] = data['Market Returns'] * data['Stance'].shift(1)
data[['Market Returns','Strategy']].cumsum().plot(grid=True,figsize=(8,5))
Out[88]:
In [80]:
data["diff"] = data["42d"] - data["252d"]
data[["Close", "diff"]].plot(subplots=True, figsize=(8, 5))
Out[80]:
In [81]:
sigdiff = 100.0
data["Signal"] = np.where(data["diff"]>sigdiff,1,0)
data["Signal"] = np.where(data["diff"]<-sigdiff,-1,0)
data[["Close","diff","Signal"]].plot(subplots=True, figsize=(8,8))
Out[81]:
In [82]:
data["Returns"] = np.log(data["Close"] / data["Close"].shift(1))

data["Strategy"] = (data["Signal"] * data["Returns"])
data["Earnings"] = data["Strategy"].cumsum()
data[["Close", "Signal", "Earnings"]].plot(subplots=True, figsize=(10, 8))
Out[82]:

Resources You Will Ever Need