Source code for autotab.utils


__all__ = ["Callbacks", "data_to_csv", "data_to_h5",
           "EarlyStopperMinImp", "DeltaYStopper"]


import numpy as np
import pandas as pd

try:
    from skopt.callbacks import EarlyStopper
except ModuleNotFoundError:
    class EarlyStopper(object): pass


[docs]class Callbacks(object): """callbacks to be executed.""" def on_build_begin(self, model, **model_kwargs)->None: """called before ``build`` method of parent and loop""" return
[docs] def on_build_end(self, model, **model_kwargs)->None: """called at the end ``build`` method of parent and loop""" return
[docs] def on_fit_begin(self, x=None, y=None, validation_data=None)->None: """called before ``fit`` method of parent loop. This callback does not run when cross validation is used. For that consider using ``on_cross_val_begin``.""" return
[docs] def on_fit_end(self, x=None, y=None, validation_data=None)->None: """called at the end ``fit`` method of parent loop. This callback does not run when cross validation is used. For that consider using ``on_cross_val_end``."""
[docs] def on_eval_begin(self, model, iter_num=None, x=None, y=None, validation_data=None)->None: """called before ``evaluate`` method of parent loop""" return
[docs] def on_eval_end(self, model, iter_num=None, x=None, y=None, validation_data=None)->None: """called at the end ``evaluate`` method of parent loop""" return
[docs] def on_cross_val_begin(self, model, iter_num=None, x=None, y=None, validation_data=None)->None: """called at the start of cross validation.""" return
[docs] def on_cross_val_end(self, model, iter_num=None, x=None, y=None, validation_data=None)->None: """called at the end of cross validation.""" return
class DeltaYStopper(EarlyStopper): def __init__(self, min_val_loss, patience): super(DeltaYStopper, self).__init__() self.min_val_loss = min_val_loss self.patience =patience self.counter = 0 self.wait = 0 self.best = 999999999999 self.best_iter = 0 def _criterion(self, result): self.counter += 1 diff = abs(np.nanmin(result.func_vals) - self.best) if diff > self.min_val_loss: self.best_iter = self.counter self.best = np.nanmin(result.func_vals) if self.counter - self.best_iter > self.patience: print(f'early stopping at {self.counter}') return True return False class EarlyStopperMinImp(EarlyStopper): """ Stops optimization if objective function does not show improvement after first `patience` iterations. """ def __init__(self, min_improvement, patience): super(EarlyStopperMinImp, self).__init__() self.patience = patience self.min_improvement = min_improvement self.counter = 0 def _criterion(self, result): self.counter += 1 if self.counter>= self.patience: return np.nanmin(result.func_vals) > self.min_improvement return None def data_to_h5(filepath, x, y, val_x=None, val_y=None, test_x=None, test_y=None): import h5py f = h5py.File(filepath, mode='w') _save_data_to_hdf5('training_data', x, y, f) if val_x is not None: _save_data_to_hdf5('validation_data', val_x, val_y, f) if test_x is not None: _save_data_to_hdf5('test_data', test_x, test_y, f) f.close() return def _save_data_to_hdf5(data_type, x, y, f): """Saves one data_type in h5py. data_type is string indicating whether it is training, validation or test data.""" assert x is not None group_name = f.create_group(data_type) for name, val in zip(['x', 'y'], [x, y]): param_dset = group_name.create_dataset(name, val.shape, dtype=val.dtype) if not val.shape: # scalar param_dset[()] = val else: param_dset[:] = val return def data_to_csv(filepath: str, all_features: list, x, y): if x is None: pd.DataFrame().to_csv(filepath) else: pd.DataFrame(np.concatenate([x, y], axis=1), columns=all_features).to_csv(filepath) return