# Experiment Management

## Introduction
This guide illustrates how to track experiments using MLFlow and how to serve models.

In [None]:
# %%capture
# !pip install -U onetick-ml

In [None]:
from onetick import ml
import onetick.py as otp
'otml:', ml.__version__, ' otp:', otp.__version__

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yaml


start = otp.dt(2022, 5, 10, 9, 30)
end = otp.dt(2022, 11, 10, 16, 0)


class VolumePrediction(ml.Experiment):
    target_columns = ["VOLUME"]
    features_columns = ["VOLUME_LAG_.*"]

    # DATA
    datafeeds = [
        ml.OneTickBarsDatafeed(
            db="NYSE_TAQ_BARS",
            tick_type="TRD_1M",
            symbols=["SPY"],
            start=start,
            end=end,
            bucket=600,
        )
    ]

    splitters = [
        ml.PercentageSplitter(test_size=0.15, val_size=0.15)
    ]

    pipeline = [
        ml.CalcLags(periods=[1, 2, 3, 39, 40], columns=["VOLUME"]),
    ]

    # MODEL
    models = [
        ml.LGBMRegressor()
    ]

    train_params = {"verbose": 0}

    # EVALUATION
    evaluators = [ml.MAPEEvaluator(),
                  ml.MAEEvaluator(),
                  ml.RMSEEvaluator(),
                  ml.R2Evaluator()]

## Run full experiment cycle, get resulted metrics and predictions

In [None]:
exp = VolumePrediction()
config = exp.serialize_config()
metrics, predictions = exp.run()

In [None]:
exp.x_test

In [None]:
pd.DataFrame([metrics])

## Local saving and loading models
You can save the trained model simply by calling a function `save_model()` of the experiment:

In [None]:
exp.save_model('./model.cbm')

metrics = exp.calc_metrics()
metrics

To restore a model, we first initialize the experiment and prepare the data, and then call `load_model()` instead of going through a model training stage.

In [None]:
exp = ml.build_experiment(config)

# data stage
exp.get_data()
exp.prepare_data()

# model load stage
model = exp.load_model(ml.LGBMRegressor(), './model.cbm')

# evaluate stage
predictions = exp.predict(model=model)
metrics = exp.calc_metrics()
metrics

## MLFlow usage

### Save experiment to MLFlow
Special attributes in experiment define MLFlow tracking capabilities.
- `log_models` : `bool` – enable logging of the trained model.
- `experiment_name` : `str` – the name of the experiment.
- `mlflow_url` : `str` – MLFlow tracking URL used to log parameters, metrics and artifacts.

After running the whole cycle of an experiment, you can save it to MLFlow by calling `.save_mlflow_run()` method.

In [None]:
class MLFlowLoggedExperiment(VolumePrediction):
    general = {'log_models': True, 
               'experiment_name': 'example-experiment', 
               'mlflow_url': 'http://172.16.1.89:5000/'}

experiment = MLFlowLoggedExperiment()
metrics, predictions = experiment.run()
run_id = experiment.save_mlflow_run()

metrics

### Restore experiment from MLFlow
We use `run_id` produced in the previous step to call `restore_experiment_from_mlflow()` utility function. This function reconstructs the experiment and restores the trained model.

In [None]:
# Load experiment from MLflow
experiment = ml.restore_experiment_from_mlflow(mlflow_url='http://172.16.1.89:5000/',
                                               run_id=run_id)

# data stage
experiment.get_data()
experiment.prepare_data()

# we can skip model stage and go directly to prediction and metrics calculation
predictions = experiment.predict()
metrics = experiment.calc_metrics()

# metrics are the same, as in previous step
metrics
