ad/ad_model/ad_model.py

   1 # ==================================================================================
   2 #  Copyright (c) 2020 HCL Technologies Limited.
   3 #
   4 #  Licensed under the Apache License, Version 2.0 (the "License");
   5 #  you may not use this file except in compliance with the License.
   6 #  You may obtain a copy of the License at
   7 #
   8 #     http://www.apache.org/licenses/LICENSE-2.0
   9 #
  10 #  Unless required by applicable law or agreed to in writing, software
  11 #  distributed under the License is distributed on an "AS IS" BASIS,
  12 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 #  See the License for the specific language governing permissions and
  14 #  limitations under the License.
  15 # ==================================================================================
  16
  17 import hdbscan
  18 import pandas as pd
  19 import numpy as np
  20 import joblib
  21 import random
  22 import json
  23
  24 class modelling(object):
  25     def __init__(self,data):
  26         """ Separating UEID and timestamp features to be mapped later after prediction  """
  27         self.time = data.MeasTimestampRF
  28         self.id = data.UEID
  29         self.data = data.drop(['UEID', 'MeasTimestampRF'], axis = 1)
  30
  31     def predict(self, name):
  32         """
  33            Load the saved model and map the predicted category into Category field.
  34            Map UEID, MeasTimestampRF with the predicted result.
  35         """
  36         model = joblib.load('/tmp/ad/' + name)
  37         pred = model.predict(self.data)
  38         data = self.data.copy()
  39         le = joblib.load('/tmp/ad/LabelEncoder')
  40         data['Category'] = le.inverse_transform(pred)
  41         data['MeasTimestampRF'] = self.time
  42         data['UEID'] = self.id
  43         return data
  44
  45 def compare(df):
  46     """
  47      If the category of UEID is present in the segment file, it is considered as normal(0)
  48      otherwise, the sample is considered as anomaly.
  49     """
  50     with open("/tmp/ad/ue_seg.json", "r") as json_data:
  51         segment = json.loads(json_data.read())
  52     anomaly = []
  53     for i in df.index:
  54         if df.loc[i, 'Category'] in segment[str(df.loc[i,'UEID'])]:
  55             anomaly.append(0)
  56         else:
  57             anomaly.append(1)
  58     return anomaly
  59
  60 def HDB_PREDICT(df):
  61     """
  62         Extract all the unique UEID
  63         Call Predict method to get the final data for the randomly selected UEID
  64     """
  65     ue_list = df.UEID.unique()  # Extract unique UEIDs
  66     ue = random.choice(ue_list) # Randomly selected the ue list
  67     df = df[df['UEID'] == ue]
  68     db = modelling(df)
  69     db_df = db.predict('RF')# Calls predict module and store the result into db_df
  70     del db
  71
  72     db_df['Anomaly'] = compare(db_df)
  73     return db_df