A Simple Backtest





Kerry Back

Linear regression from scikit-learn

from sqlalchemy import create_engine
import pymssql
import pandas as pd
from scikit-learn.linear_model import LinearRegression

model = LinearRegression()

Connect to the GHZ database

server = "mssql-82792-0.cloudclusters.net:16272"
username = "user"
password = "RiceOwls1912" 
database = "ghz"
string = "mssql+pymssql://" + username + ":" + password + "@" + server + "/" + database

conn = create_engine(string).connect()

Get data

df = pd.read_sql(
    """
    select date, ticker, bm, roeq, ret
    from data
    order by date, ticker
    """,
    conn
  )
df = df.dropna()
df = df.set_index(["date", "ticker"])

Split into train and test

later = df.index.get_level_values("date")>="2010-01"

train = df[~later]
test = df[later]

Train linear model

features = ["bm", "roeq"]

Xtrain = train[features]
ytrain = train["ret"]

model.fit(Xtrain, ytrain)

Regression coefficients

print("intercept =", model.intercept_)

coefs = pd.Series(model.coef_, index=features)
coefs
intercept = 0.01013452166825877
bm      0.000078
roeq    0.000027
dtype: float64

Predict

Xtest = test[features]

ypredict = model.predict(Xtest)
ypredict = pd.Series(ypredict, index=test.index)

Sort

cut = lambda x: pd.qcut(x, 5, labels=range(1, 6))
quintiles = ypredict.groupby("date", group_keys=False).apply(cut)
quintiles.name = "quintile"

Compute returns

test = test.join(quintiles)

rets = test.reset_index().groupby(["date", "quintile"]).ret.mean()
rets = rets.unstack()
print(rets.head())
print(rets.mean())
quintile         1         2         3         4         5
date                                                      
2010-01  -0.018635 -0.029396 -0.019440 -0.011638  0.055819
2010-02   0.036299  0.040267  0.038363  0.039892  0.040899
2010-03   0.077513  0.064601  0.084102  0.071667  0.104047
2010-04   0.068089  0.047461  0.054447  0.085994  0.138964
2010-05  -0.085651 -0.066710 -0.072764 -0.077961 -0.102800
quintile
1    0.011773
2    0.012077
3    0.012061
4    0.011726
5    0.015664
dtype: float64