# ==================================================================================
#  Copyright (c) 2020 HCL Technologies Limited.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
# ==================================================================================

import joblib
from ad_model.processing import PREPROCESS
from sklearn.metrics import f1_score
from sklearn.ensemble import IsolationForest
from database import DATABASE, DUMMY
import numpy as np


class modelling(object):
    r""" The modelling class takes input as dataframe or array and train Isolation Forest model

    Paramteres
    .........
    data: DataFrame or array
        input dataset
    cols: list
        list of parameters in input dataset

    Attributes
    ----------
    actual:array
        actual label for test data
    X: DataFrame or array
        transformed values of input data
    """
    def __init__(self, data):
        self.data = data
        self.cols = data.columns

    def read_test(self, db):
        """ Read test dataset for model validation"""

        db.read_data('valid')
        test = db.data
        self.actual = test['Anomaly']
        X = test[self.cols]
        sc = joblib.load('scale')
        self.X = sc.transform(X)

    def isoforest(self, outliers_fraction=0.05, random_state=42, push_model=False):
        """ Train isolation forest

        Parameters
        ----------
        outliers_fraction: float between 0.01 to 0.5 (default=0.05)
            percentage of anomalous available in input data
        push_model: boolean (default=False)
            return f_1 score if True else push model into repo
        random_state: int (default=42)
        """
        iso = IsolationForest(contamination=outliers_fraction, random_state=random_state)
        md = iso.fit(self.data, None)
        if push_model:
            joblib.dump(self.cols, 'params')
            joblib.dump(md, 'model')
        return test(self, md)


def train(thread=False):
    """
     Main function to perform training on input data
    """
    if thread:
        db = DUMMY()
    else:
        db = DATABASE('UEData')
    db.read_data('train')
    ps = PREPROCESS(db.data)
    ps.process()
    df = ps.data

    mod = modelling(df)
    mod.read_test(db)

    scores = []
    for of in np.arange(0.01, 0.4, 0.01):
        scores.append(mod.isoforest(outliers_fraction=of))
    opt_f1 = scores.index(max(scores)) + 1
    mod.isoforest(outliers_fraction=opt_f1*0.01, push_model=True)
    print("Optimum value of contamination : {}".format(opt_f1*0.01))
    print('Training Ends : ')


def test(self, model):
    pred = model.predict(self.X)
    if -1 in pred:
        pred = [1 if p == -1 else 0 for p in pred]
    return f1_score(self.actual, pred)