Note
Go to the end to download the full example code or to run this example in your browser via Binder
regression
import site
site.addsitedir("D:\\mytools\\AI4Water")
from ai4water.datasets import busan_beach
from skopt.plots import plot_objective
from autotab import OptimizePipeline
data = busan_beach()
print(data.shape)
/home/docs/checkouts/readthedocs.org/user_builds/autotab/envs/dev/lib/python3.7/site-packages/sklearn/experimental/enable_hist_gradient_boosting.py:17: UserWarning: Since version 1.0, it is not needed to import enable_hist_gradient_boosting anymore. HistGradientBoostingClassifier and HistGradientBoostingRegressor are now stable and can be normally imported from sklearn.ensemble.
"Since version 1.0, "
(1446, 14)
print(data.head())
tide_cm wat_temp_c ... rel_hum tetx_coppml
index ...
2018-06-19 00:00:00 36.407149 19.321232 ... 95.000000 NaN
2018-06-19 00:30:00 35.562515 19.320124 ... 95.000000 NaN
2018-06-19 01:00:00 34.808016 19.319666 ... 95.000000 NaN
2018-06-19 01:30:00 30.645216 19.320406 ... 95.006667 NaN
2018-06-19 02:00:00 26.608980 19.326729 ... 95.006667 NaN
[5 rows x 14 columns]
kws = {
'inputs_to_transform': data.columns.tolist()[0:-1],
'outputs_to_transform': data.columns.tolist()[-1:],
'parent_iterations': 100,
'child_iterations': 20, # don't optimize hyperparamters only for demonstration
'parent_algorithm': 'bayes',
'child_algorithm': 'bayes',
'eval_metric': 'rmse',
'cv_parent_hpo': True,
'cv_child_hpo': True,
'cross_validator': {"KFold": {"n_splits": 5}},
'monitor': ['r2', 'r2_score'],
'models': [ "LinearRegression",
"Lasso",
"RandomForestRegressor",
"HistGradientBoostingRegressor",
"CatBoostRegressor",
"XGBRegressor",
"LGBMRegressor",
"GradientBoostingRegressor",
"ExtraTreeRegressor",
"ExtraTreesRegressor"
],
'input_features': data.columns.tolist()[0:-1],
'output_features': data.columns.tolist()[-1:],
'split_random': True,
'seed':2809,
}
with OptimizePipeline(**kws) as pl:
pl.remove_transformation('box-cox')
pl._pp_plots = ["regression", "prediction", "residual", "edf"]
# pl.change_transformation_behavior('yeo-johnson', {'pre_center': True})
# results = pl.fit(data=data, process_results=False)
# plot the convergence plot to illustrate how much improvement occurred w.r.t
# evaluation metric
# pl.plot_convergence(save=False)
#
# # %%
# pl.plot_convergence(save=False, original=True)
#
# ##############################################
#
# # show searched space
#
# pl.optimizer_._plot_parallel_coords(figsize=(16, 8), save=False)
#
# ##############################################
#
# pl.optimizer_._plot_distributions(save=False)
#
# ##############################################3
#
# pl.optimizer_.plot_importance(save=False)
#
# ###########################################
#
# # plot first order and second order partial dependence plots gaussian process
# _ = plot_objective(results)
#
# ###########################################
#
# pl.optimizer_._plot_evaluations(save=False)
#
# ###########################################
#
# pl.optimizer_._plot_edf(save=False)
#
# ##############################################
#
# pl.bfe_all_best_models(data=data)
#
# ##############################################
#
# pl.dumbbell_plot(data=data, save=False, upper_limit=1e15)
#
# ##############################################
#
# pl.dumbbell_plot(data=data, metric_name='r2', save=False)
#
# ##############################################
#
# pl.taylor_plot(data=data, save=False)
#
# ##############################################
#
# pl.compare_models()
#
# ##############################################
#
# # compare the performance of models
# pl.compare_models(plot_type="bar_chart")
#
# ##############################################
#
# # compare the performance of models w.r.t R2
# pl.compare_models("r2", plot_type="bar_chart")
#
# # %%
# model = pl.be_best_model_from_config(data=data, metric_name="r2_score")
#
# # %%
# model.evaluate_on_test_data(data=data, metrics="r2_score")
## %%
# model = pl.bfe_best_model_from_scratch(metric_name='r2_score', data=data, verbosity=0)
#
# # %%
# model.evaluate_on_training_data(data=data, metrics="r2_score")
#
# # %%
# model.evaluate_on_test_data(data=data, metrics="r2_score")
#
# #################################################
#
# print(f"all results are save in {pl.path} folder")
#
# #################################################
#
# # remove all the files/folders which are now nomore required.
# pl.cleanup()
Total running time of the script: ( 0 minutes 3.190 seconds)