Note

Go to the end to download the full example code or to run this example in your browser via Binder

regression

import site
site.addsitedir("D:\\mytools\\AI4Water")
from ai4water.datasets import busan_beach
from skopt.plots import plot_objective
from autotab import OptimizePipeline

data = busan_beach()

print(data.shape)

/home/docs/checkouts/readthedocs.org/user_builds/autotab/envs/dev/lib/python3.7/site-packages/sklearn/experimental/enable_hist_gradient_boosting.py:17: UserWarning: Since version 1.0, it is not needed to import enable_hist_gradient_boosting anymore. HistGradientBoostingClassifier and HistGradientBoostingRegressor are now stable and can be normally imported from sklearn.ensemble.
  "Since version 1.0, "
(1446, 14)

print(data.head())

                       tide_cm  wat_temp_c  ...    rel_hum  tetx_coppml
index                                       ...
2018-06-19 00:00:00  36.407149   19.321232  ...  95.000000          NaN
2018-06-19 00:30:00  35.562515   19.320124  ...  95.000000          NaN
2018-06-19 01:00:00  34.808016   19.319666  ...  95.000000          NaN
2018-06-19 01:30:00  30.645216   19.320406  ...  95.006667          NaN
2018-06-19 02:00:00  26.608980   19.326729  ...  95.006667          NaN

[5 rows x 14 columns]

kws = {
    'inputs_to_transform': data.columns.tolist()[0:-1],
    'outputs_to_transform': data.columns.tolist()[-1:],
    'parent_iterations': 100,
    'child_iterations': 20,  # don't optimize hyperparamters only for demonstration
    'parent_algorithm': 'bayes',
    'child_algorithm': 'bayes',
    'eval_metric': 'rmse',
    'cv_parent_hpo': True,
    'cv_child_hpo': True,
    'cross_validator': {"KFold": {"n_splits": 5}},
    'monitor': ['r2', 'r2_score'],
    'models': [ "LinearRegression",
        "Lasso",
        "RandomForestRegressor",
        "HistGradientBoostingRegressor",
         "CatBoostRegressor",
         "XGBRegressor",
         "LGBMRegressor",
         "GradientBoostingRegressor",
         "ExtraTreeRegressor",
         "ExtraTreesRegressor"
         ],

'input_features': data.columns.tolist()[0:-1],
'output_features': data.columns.tolist()[-1:],
'split_random': True,
    'seed':2809,
}

with OptimizePipeline(**kws) as pl:
    pl.remove_transformation('box-cox')
    pl._pp_plots = ["regression", "prediction", "residual", "edf"]

#    pl.change_transformation_behavior('yeo-johnson', {'pre_center': True})

#    results = pl.fit(data=data, process_results=False)

# plot the convergence plot to illustrate how much improvement occurred w.r.t
# evaluation metric

# pl.plot_convergence(save=False)
#
# # %%
# pl.plot_convergence(save=False, original=True)
#
# ##############################################
#
# # show searched space
#
# pl.optimizer_._plot_parallel_coords(figsize=(16, 8), save=False)
#
# ##############################################
#
# pl.optimizer_._plot_distributions(save=False)
#
# ##############################################3
#
# pl.optimizer_.plot_importance(save=False)
#
# ###########################################
#
# # plot first order and second order partial dependence plots gaussian process
# _ = plot_objective(results)
#
# ###########################################
#
# pl.optimizer_._plot_evaluations(save=False)
#
# ###########################################
#
# pl.optimizer_._plot_edf(save=False)
#
# ##############################################
#
# pl.bfe_all_best_models(data=data)
#
# ##############################################
#
# pl.dumbbell_plot(data=data, save=False, upper_limit=1e15)
#
# ##############################################
#
# pl.dumbbell_plot(data=data, metric_name='r2', save=False)
#
# ##############################################
#
# pl.taylor_plot(data=data, save=False)
#
# ##############################################
#
# pl.compare_models()
#
# ##############################################
#
# # compare the performance of models
# pl.compare_models(plot_type="bar_chart")
#
# ##############################################
#
# # compare the performance of models w.r.t R2
# pl.compare_models("r2", plot_type="bar_chart")
#
# # %%
# model = pl.be_best_model_from_config(data=data, metric_name="r2_score")
#
# # %%
# model.evaluate_on_test_data(data=data, metrics="r2_score")

## %%
# model = pl.bfe_best_model_from_scratch(metric_name='r2_score', data=data, verbosity=0)
#
# # %%
# model.evaluate_on_training_data(data=data, metrics="r2_score")
#
# # %%
# model.evaluate_on_test_data(data=data, metrics="r2_score")
#
# #################################################
#
# print(f"all results are save in {pl.path} folder")
#
# #################################################
#
# # remove all the files/folders which are now nomore required.
# pl.cleanup()

Total running time of the script: ( 0 minutes 3.190 seconds)

Gallery generated by Sphinx-Gallery