[Issue-Id:RICAPP-204] Bump version to 1.0.0 and push to staging area

[ric-app/ad.git] / src / processing.py
diff --git a/ad/ad_model/processing.py b/src/processing.py

similarity index 80%

rename from ad/ad_model/processing.py

rename to src/processing.py

index 3a19dd1..b48f8bf 100644 (file)
--- a/ad/ad_model/processing.py
+++ b/src/processing.py
@@ -40,14 +40,17 @@ class PREPROCESS(object):
          """
             Columns that are not useful for the prediction will be dropped(UEID, Category, & Timestamp)
          """
-        self.temp = None
          self.data = data
+        self.convert_gb_to_mb()
  
      def variation(self):
          """ drop the constant parameters """
          if len(self.data) > 1:
              self.data = self.data.loc[:, self.data.apply(pd.Series.nunique) != 1]
  
+    def convert_gb_to_mb(self):
+        self.data.iloc[:]['DRB.UEThpDl'] = self.data['DRB.UEThpDl'].apply(lambda x: x*1024)
+
      def numerical_data(self):
          """  Filters only numeric data types """
          numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
@@ -66,28 +69,31 @@ class PREPROCESS(object):
  
      # check skewness of all parameters and use log transform if half of parameters are enough skewd
      # otherwise use standardization
-    def transform(self):
+    def fit_transform(self):
          """ use normalizer transformation to bring all parameters in same scale """
-        scale = Normalizer()  # StandardScaler()
-        data = scale.fit_transform(self.data)
-        self.data = pd.DataFrame(data, columns=self.data.columns)
-        joblib.dump(scale, 'scale')
+        scale = Normalizer().fit(self.data)
+        joblib.dump(scale, 'src/scale')
+
+    def transform(self):
+        scale = joblib.load('src/scale')
+        self.data = pd.DataFrame(scale.transform(self.data), columns=self.data.columns)
+
+    def save_cols(self):
+        joblib.dump(self.data.columns, 'src/num_params')
  
      def process(self):
          """
            Calls the modules for the data preprocessing like dropping columns, normalization etc.,
          """
-        temp = ['du-id', 'measTimeStampRf', 'ue-id', 'nrCellIdentity', 'targetTput', 'x', 'y']
+        temp = []
          for col in self.data.columns:
-            if 'nb' in col:
+            if 'nb' in col or 'Geo' in col or 'anomal' in col or 'target' in col:
                  temp.append(col)
-
-        if set(temp).issubset(self.data.columns):
-            self.temp = self.data[temp]
-            self.data = self.data.drop(temp, axis=1)
+        self.data = self.data.drop(temp, axis=1)
          self.numerical_data()
          self.drop_na()
          self.variation()
          self.correlation()
+        self.fit_transform()
          self.transform()
-        return self.data
+        self.save_cols()