From 9506d855598158b2ec73ce09d26dcb298b6e17ee Mon Sep 17 00:00:00 2001 From: tganesh2k Date: Wed, 12 Oct 2022 16:57:22 +0530 Subject: [PATCH] Adding files for transform functions of data-extraction module Issue-Id: AIMLFW-2 Signed-off-by: tganesh2k Change-Id: I36045b5e23ab915fcff69eee75526bb7e6a37630 --- src/transform/DefaultSparkTransform.py | 43 ++++++++++++++++++++ src/transform/SQLTransform.py | 71 ++++++++++++++++++++++++++++++++++ src/transform/TransformClassConfig.ini | 11 ++++++ src/transform/__init__.py | 0 4 files changed, 125 insertions(+) create mode 100644 src/transform/DefaultSparkTransform.py create mode 100644 src/transform/SQLTransform.py create mode 100644 src/transform/TransformClassConfig.ini create mode 100644 src/transform/__init__.py diff --git a/src/transform/DefaultSparkTransform.py b/src/transform/DefaultSparkTransform.py new file mode 100644 index 0000000..0d0c927 --- /dev/null +++ b/src/transform/DefaultSparkTransform.py @@ -0,0 +1,43 @@ +# ================================================================================== +# +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ================================================================================== + +""" +@Module : Currently UI Hardcoded to use SQL transformer, to be implemented if required +""" +from transform.Base import Transform +class DefaultSparkTransform(Transform): + """ + @Module : To be implemented + """ + def __init__(self,classflavour): + """ + @Method:constructor + """ + self.ClassType="Default" + self.flavour=classflavour + def init(self,sparkhelper, confighelper,inputdict): + """ + @Methond: init to be implemented + """ + pass + + def transform(self,sparksession,sparkdf): + """ + @Method:Generic transform to be implemented + """ + pass diff --git a/src/transform/SQLTransform.py b/src/transform/SQLTransform.py new file mode 100644 index 0000000..664b2a3 --- /dev/null +++ b/src/transform/SQLTransform.py @@ -0,0 +1,71 @@ +# ================================================================================== +# +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ================================================================================== + +""" +@Module: Sql Transformer +""" +from pyspark.ml.feature import SQLTransformer +from transform.Base import Transform +class SQLTransform(Transform): + """ + @Class: SQL Transform + @ BaseClass: Transform + """ + def __init__(self,classflavour): + self.logger = None + self.sqlstatement = None + self.flavour=classflavour + + def init(self, sparkhelper, confighelper, inputdict): + """ + @Method: init + @input: Spark helper, confighelper, inputdict + """ + self.logger = confighelper.getLogger() + feat_list = self.get_feature_list(inputdict["FeatureList"]) + self.sqlstatement = "SELECT " + feat_list + " FROM __THIS__ " + if "SQLFilter" in inputdict.keys(): + self.sqlstatement = self.sqlstatement+ "WHERE " + inputdict["SQLFilter"] + + self.logger.debug(" The ML LIB SQL to be executed is " + self.sqlstatement) + + def get_feature_list(self,features_str): + """ + Wraps all feature argument inside `` character, + to handle any spaces inside feature names + """ + q_features = "" + if ( features_str is not None) and len(features_str.strip()) and (features_str.strip() != '*' ): + features = features_str.split(',') + for feature in features: + q_features = q_features + "`" + feature + "`" + "," + q_features = q_features[:-1] + else: + q_features = features_str + + return q_features + + def transform(self, sparksession, sparkdf): + """ + @Method: transform + @Inputs sparksession, sparkdf + """ + sqltrans = SQLTransformer() + sqltrans.setStatement(self.sqlstatement) + new_df = sqltrans.transform(sparkdf) + return new_df diff --git a/src/transform/TransformClassConfig.ini b/src/transform/TransformClassConfig.ini new file mode 100644 index 0000000..9f2f398 --- /dev/null +++ b/src/transform/TransformClassConfig.ini @@ -0,0 +1,11 @@ +[SQLTransform] +Name=SQLTransformer +Description= SQLTransformer Transforms your data using SQL statements +ClassType=Custom + +[ModuleDetails] +ModuleName=transform +BaseClassName=Transform +DefaultClassName=DefaultSparkTransform + +[EnvConfig] diff --git a/src/transform/__init__.py b/src/transform/__init__.py new file mode 100644 index 0000000..e69de29 -- 2.16.6