Analytics
Scalecast helps you forecast time series. Here is how to initiate its main object:
from scalecast.Forecaster import Forecaster
f = Forecaster(
y = array_of_values,
current_dates = array_of_dates,
future_dates=fcst_horizon_length,
test_length = 0, # do you want to test all models? if so, on how many or what percent of observations?
cis = False, # evaluate conformal confidence intervals for all models?
metrics = ['rmse','mape','mae','r2'], # what metrics to evaluate over the validation/test sets?
)
Uniform ML modeling (with models from a diverse set of libraries, including scikit-learn, statsmodels, and tensorflow), reporting, and data visualizations are offered through the Forecaster
and MVForecaster
interfaces. Data storage and processing then becomes easy as all applicable data, predictions, and many derived metrics are contained in a few objects with much customization available through different modules. Feature requests and issue reporting are welcome! Don't forget to leave a star!⭐
f.set_estimator('lstm')
f.manual_forecast(
lags=36,
batch_size=32,
epochs=15,
validation_split=.2,
activation='tanh',
optimizer='Adam',
learning_rate=0.001,
lstm_layer_sizes=(100,)*3,
dropout=(0,)*3,
)
f.auto_Xvar_select( # iterate through different combinations of covariates
estimator = 'lasso', # what estimator?
alpha = .2, # estimator hyperparams?
monitor = 'ValidationMetricValue', # what metric to monitor to make decisions?
cross_validate = True, # cross validate
cvkwargs = {'k':3}, # 3 folds
)
from scalecast import GridGenerator
GridGenerator.get_example_grids()
models = ['ridge','lasso','xgboost','lightgbm','knn']
f.tune_test_forecast(
models,
limit_grid_size = .2,
feature_importance = True, # save pfi feature importance for each model?
cross_validate = True, # cross validate? if False, using a seperate validation set that the user can specify
rolling = True, # rolling time series cross validation?
k = 3, # how many folds?
)
import matplotlib.pyplot as plt
fig, ax = plt.subplots(2,1, figsize = (12,6))
f.plot_test_set(models=models,order_by='TestSetRMSE',ax=ax[0])
f.plot(models=models,order_by='TestSetRMSE',ax=ax[1])
plt.show()
from scalecast import GridGenerator
from scalecast.Pipeline import Transformer, Reverter, Pipeline
from scalecast.util import find_optimal_transformation, backtest_metrics
def forecaster(f):
models = ['ridge','lasso','xgboost','lightgbm','knn']
f.tune_test_forecast(
models,
limit_grid_size = .2, # randomized grid search on 20% of original grid sizes
feature_importance = True, # save pfi feature importance for each model?
cross_validate = True, # cross validate? if False, using a seperate validation set that the user can specify
rolling = True, # rolling time series cross validation?
k = 3, # how many folds?
)
transformer, reverter = find_optimal_transformation(f) # just one of several ways to select transformations for your series
pipeline = Pipeline(
steps = [
('Transform',transformer),
('Forecast',forecaster),
('Revert',reverter),
]
)
f = pipeline.fit_predict(f)
backtest_results = pipeline.backtest(f)
metrics = backtest_metrics(backtest_results)
StackingRegressor
from scikit-learn or using its own stacking procedure.from scalecast.auxmodels import auto_arima
f.set_estimator('lstm')
f.manual_forecast(
lags=36,
batch_size=32,
epochs=15,
validation_split=.2,
activation='tanh',
optimizer='Adam',
learning_rate=0.001,
lstm_layer_sizes=(100,)*3,
dropout=(0,)*3,
)
f.set_estimator('prophet')
f.manual_forecast()
auto_arima(f)
# stack previously evaluated models
f.add_signals(['lstm','prophet','arima'])
f.set_estimator('catboost')
f.manual_forecast()
from scalecast.MVForecaster import MVForecaster
from scalecast.Pipeline import MVPipeline
from scalecast.util import find_optimal_transformation, backtest_metrics
from scalecast import GridGenerator
GridGenerator.get_mv_grids()
def mvforecaster(mvf):
models = ['ridge','lasso','xgboost','lightgbm','knn']
mvf.tune_test_forecast(
models,
limit_grid_size = .2, # randomized grid search on 20% of original grid sizes
cross_validate = True, # cross validate? if False, using a seperate validation set that the user can specify
rolling = True, # rolling time series cross validation?
k = 3, # how many folds?
)
mvf = MVForecaster(f1,f2,f3) # can take N Forecaster objects
transformer1, reverter1 = find_optimal_transformation(f1)
transformer2, reverter2 = find_optimal_transformation(f2)
transformer3, reverter3 = find_optimal_transformation(f3)
pipeline = MVPipeline(
steps = [
('Transform',[transformer1,transformer2,transformer3]),
('Forecast',mvforecaster),
('Revert',[reverter1,reverter2,reverter3])
]
)
f1, f2, f3 = pipeline.fit_predict(f1, f2, f3)
backtest_results = pipeline.backtest(f1, f2, f3)
metrics = backtest_metrics(backtest_results)
Forecaster
object and use that model to make predictions on the data in a separate Forecaster
object.f = Forecaster(...)
f.auto_Xvar_select()
f.set_estimator('xgboost')
f.cross_validate()
f.auto_forecast()
f_new = Forecaster(...) # different series than f
f_new = infer_apply_Xvar_selection(infer_from=f,apply_to=f_new)
f_new.transfer_predict(transfer_from=f,model='xgboost') # transfers the xgboost model from f to f_new
pip install --upgrade scalecast
pip install tensorflow
(for RNN/LSTM on Windows) or pip install tensorflow-macos
(for MAC/M1)pip install darts
pip install prophet
pip install greykite
(for the silverkite model)pip install kats
(changepoint detection)pip install pmdarima
(auto arima)pip install tqdm
(progress bar for notebook)pip install ipython
(widgets for notebook)pip install ipywidgets
(widgets for notebook)jupyter nbextension enable --py widgetsnbextension
(widgets for notebook)jupyter labextension install @jupyter-widgets/jupyterlab-manager
(widgets for Lab)Scalecast: Machine Learning & Deep Learning
@misc{scalecast,
title = {{scalecast}},
author = {Michael Keith},
year = {2024},
version = {<your version>},
url = {https://scalecast.readthedocs.io/en/latest/},
}