ad/insert.py

   1 # ==================================================================================
   2 #  Copyright (c) 2020 HCL Technologies Limited.
   3 #
   4 #  Licensed under the Apache License, Version 2.0 (the "License");
   5 #  you may not use this file except in compliance with the License.
   6 #  You may obtain a copy of the License at
   7 #
   8 #     http://www.apache.org/licenses/LICENSE-2.0
   9 #
  10 #  Unless required by applicable law or agreed to in writing, software
  11 #  distributed under the License is distributed on an "AS IS" BASIS,
  12 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 #  See the License for the specific language governing permissions and
  14 #  limitations under the License.
  15 # ==================================================================================
  16
  17 """
  18 This Module is temporary for pushing data into influxdb when AD xApp starts. It will depreciated in future, when data will be coming through KPIMON
  19 """
  20
  21 import pandas as pd
  22 from influxdb import DataFrameClient
  23 import datetime
  24
  25
  26 class INSERTDATA:
  27
  28     def __init__(self):
  29         host = 'r4-influxdb.ricplt'
  30         self.client = DataFrameClient(host, '8086', 'root', 'root')
  31         self.dropdb('UEData')
  32         self.createdb('UEData')
  33
  34     def createdb(self, dbname):
  35         print("Create database: " + dbname)
  36         self.client.create_database(dbname)
  37         self.client.switch_database(dbname)
  38
  39     def dropdb(self, dbname):
  40         print("DROP database: " + dbname)
  41         self.client.drop_database(dbname)
  42
  43     def dropmeas(self, measname):
  44         print("DROP MEASUREMENT: " + measname)
  45         self.client.query('DROP MEASUREMENT '+measname)
  46
  47
  48 def explode(df):
  49     for col in df.columns:
  50         if isinstance(df.iloc[0][col], list) and col != 'neighbourCellList':
  51             df = df.explode(col)
  52         d = df[col].apply(pd.Series)
  53         if col in list(range(5)):
  54             d.columns = d.columns + '_' + str(col)
  55         elif 'nbCellRfReport_' in col:
  56             d.columns = d.columns + '_nb_' + col[-1]
  57         df[d.columns] = d
  58         df = df.drop(col, axis=1)
  59     return df
  60
  61
  62 def jsonToTable(df):
  63     df.index = range(len(df))
  64     cols = [col for col in df.columns if isinstance(df.iloc[0][col], dict) or isinstance(df.iloc[0][col], list)]
  65     if len(cols) == 0:
  66         return df
  67     for col in cols:
  68         d = explode(pd.DataFrame(df[col], columns=[col]))
  69         d = d.dropna(axis=1, how='all')
  70         df = pd.concat([df, d], axis=1)
  71         df = df.drop(col, axis=1).dropna()
  72     return jsonToTable(df)
  73
  74
  75 def time(df):
  76     df.index = pd.date_range(start=datetime.datetime.now(), freq='10ms', periods=len(df))
  77     df['measTimeStampRf'] = df['measTimeStampRf'].apply(lambda x: str(x))
  78     return df
  79
  80
  81 def populatedb():
  82     data = pd.read_csv('ad/valid.csv')
  83     data = time(data)
  84
  85     # inintiate connection and create database UEDATA
  86     db = INSERTDATA()
  87     db.client.write_points(data, 'valid')
  88     del data
  89
  90     df = pd.read_json('ad/ue.json.gz', lines=True)
  91     df = df[['ueMeasReport']].dropna()
  92     df = jsonToTable(df)
  93     df = time(df)
  94
  95     db.client.write_points(df, 'train', batch_size=500,  protocol='line')
  96     db.client.write_points(df, 'liveUE', batch_size=500, protocol='line')