{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# hyperparameter optimization using HyperOpt\n\nThere are two ways of optimization of hyperparameters in AI4Water. The :py:class:`ai4water.hyperopt.HyperOpt`\nclass is the lower level api while :py:meth:`Model.optimize_hyperparameters` is\nthe higher level api. For using HyperOpt class, the user has to define the\nobjecive function and hyerparameter space explicitly. Morevoer, the user has\nto instantiate the HyperOpt class and call the fit method on it.\n\nThis example shows, how to use HyperOpt class for optimization of hyperparameters.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import os\nimport math\n\nimport numpy as np\n\nfrom skopt.plots import plot_objective\nfrom SeqMetrics import RegressionMetrics\n\nfrom ai4water.functional import Model\nfrom ai4water.datasets import busan_beach\nfrom ai4water.utils.utils import get_version_info\nfrom ai4water.utils.utils import jsonize, dateandtime_now\nfrom ai4water.hyperopt import HyperOpt, Categorical, Real, Integer\n\n# sphinx_gallery_thumbnail_number = 2\n\nfor k,v in get_version_info().items():\n    print(f\"{k} version: {v}\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "data = busan_beach()\n\nSEP = os.sep"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "PREFIX = f\"hpo_{dateandtime_now()}\"\nITER = 0"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Optimizing the hyperparameters usually involves four steps"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 1) define objective function\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "def objective_fn(\n        prefix=None,\n        **suggestions)->float:\n    \"\"\"This function must build, train and evaluate the ML model.\n    The output of this function will be minimized by optimization algorithm.\n    \"\"\"\n    suggestions = jsonize(suggestions)\n    global ITER\n\n    # build model\n    _model = Model(model={\"XGBRegressor\": suggestions},\n                  prefix=prefix or PREFIX,\n                  train_fraction=1.0,\n                  split_random=True,\n                  verbosity=0,\n                  )\n\n    # train model\n    _model.fit(data=data)\n\n    # evaluate model\n    t, p = _model.predict(data='validation', return_true=True, process_results=False)\n    val_score = RegressionMetrics(t, p).r2_score()\n\n    if not math.isfinite(val_score):\n        val_score = 1.0\n\n    # since the optimization algorithm solves minimization algorithm\n    # we have to subtract r2_score from 1.0\n    # if our validation metric is something like mse or rmse,\n    # then we don't need to subtract it from 1.0\n    val_score = 1.0 - val_score\n\n    ITER += 1\n\n    print(f\"{ITER} {val_score}\")\n\n    return val_score"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 2) define parameter space\n the parameter space determines the pool of candidates from which\n hyperparameters will be choosen during optimization\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "num_samples=10\nspace = [\nInteger(low=5, high=50, name='n_estimators', num_samples=num_samples),\n# Maximum tree depth for base learners\nInteger(low=3, high=10, name='max_depth', num_samples=num_samples),\nReal(low=0.01, high=0.5, name='learning_rate', prior='log', num_samples=num_samples),\nCategorical(categories=['gbtree', 'gblinear', 'dart'], name='booster'),\n]"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 3) initial state\n this step is optional but it is always better to\n provide a good initial guess to the optimization algorithm\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "x0 = [5, 4, 0.1, \"gbtree\"]"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 4) run optimization algorithm\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Now instantiate the HyperOpt class and call .fit on it\n# algorithm can be either ``random``, ``grid``, ``bayes``, ``tpe``, ``bayes_rf``\n#\n\noptimizer = HyperOpt(\n    algorithm=\"bayes\",\n    objective_fn=objective_fn,\n    param_space=space,\n    x0=x0,\n    num_iterations=25,\n    process_results=False,\n    opt_path=f\"results{SEP}{PREFIX}\",\n    verbosity=0,\n)\n\nresults = optimizer.fit()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "print(f\"optimized parameters are \\n{optimizer.best_paras()}\")\n\nprint(np.min(optimizer.func_vals()))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## postprocessing of results\n save hyperparameters at each iteration\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "optimizer.save_iterations_as_xy()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "save convergence plot\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "optimizer._plot_convergence(save=False)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "optimizer._plot_parallel_coords(figsize=(14, 8), save=False)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "optimizer._plot_distributions(save=False)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "optimizer.plot_importance(save=False)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "_ = plot_objective(results)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "optimizer._plot_evaluations(save=False)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "optimizer._plot_edf(save=False)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Above, If you set ``process_results`` to True, all of the results are automatically\n# saved in the optimization directory.\n\n\nprint(f\"All the results are save in {optimizer.opt_path} directory\")"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}