Kerry Back
df = df.set_index(["date", "ticker"])
dates = ["2005-01", "2010-01", "2015-01", "2020-01", "3000-01"]
for train_date, end_date in zip(dates[:-1], dates[1:]):
train at train_date
predict at train_date ... up to but not including end_date
store predictions in a (date, ticker) indexed series
df["predict"] = predictions
predictions = None
for train_date, end_date in zip(dates[:-1], dates[1:]):
fltr1 = df.index.get_level_values("date") < train_date
fltr2 = df.index.get_level_values("date") < end_date
train = df[fltr1]
test = df[~fltr1 & fltr2]
Xtrain = train[features]
ytrain = train["ret"]
Xtest = test[features]
pipe.fit(Xtrain, ytrain)
pred = pipe.predict(Xtest)
pred = pd.Series(pred, index=test.index)
predictions = pd.concat((predictions, pred))
df["predict"] = predictions