From dcbaadd22400c8ab6dcd823f8737d9324caa8ecc Mon Sep 17 00:00:00 2001 From: rajdeep11 Date: Wed, 7 Jun 2023 16:55:58 +0530 Subject: [PATCH] solving sonar vulnerabilities Issue-Id: AIMLFW-22 Change-Id: Icdfbd65b907945471967422f9ecc5881bdb116d6 Signed-off-by: rajdeep11 --- tests/test_tm_apis.py | 81 +++++++++++++++++--- trainingmgr/common/trainingmgr_util.py | 14 +++- trainingmgr/trainingmgr_main.py | 135 ++++++++++++++++++--------------- 3 files changed, 156 insertions(+), 74 deletions(-) diff --git a/tests/test_tm_apis.py b/tests/test_tm_apis.py index 82eb7bb..f9b0983 100644 --- a/tests/test_tm_apis.py +++ b/tests/test_tm_apis.py @@ -59,7 +59,7 @@ class Test_upload_pipeline: trainingmgr_main.LOGGER.debug(response.data) assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR - assert expected_data in response.json.keys() + assert expected_data in response.json.keys() class Test_data_extraction_notification: @@ -187,7 +187,7 @@ class Test_pipeline_notification: def test_get_steps_state_2(self,mock1): trainingmgr_main.LOGGER.debug("******* test_get_steps_state get *******") expected_data = "test_data1" - response = self.client.get("/trainingjobs///steps_state".format("usecase1"), + response = self.client.get("/trainingjobs/{trainingjobname}/{version}/steps_state".format(trainingjobname="usecase1", version="1"), content_type="application/json") trainingmgr_main.LOGGER.debug(response.data) assert response.status_code == status.HTTP_200_OK, "Return status code NOT equal" @@ -197,7 +197,7 @@ class Test_pipeline_notification: @patch('trainingmgr.trainingmgr_main.get_field_of_given_version', return_value = db_result5) def test_negative_get_steps_state_2(self,mock1): expected_data = "Exception" - response = self.client.get("/trainingjobs///steps_state".format("usecase1"), + response = self.client.get("/trainingjobs/{trainingjobname}/{version}/steps_state".format(trainingjobname="usecase1", version="1"), content_type="application/json") trainingmgr_main.LOGGER.debug(response.data) assert response.status_code == status.HTTP_404_NOT_FOUND, "Return status code NOT equal" @@ -234,6 +234,19 @@ class Test_get_trainingjob_by_name_version: trainingmgr_main.LOGGER.debug(response.data) assert response.content_type == "application/json", "not equal content type" assert response.status_code == 404, "not equal code" + + def test_negative_get_trainingjob_by_name_version2(self): + usecase_name = "usecase7*" + version = "1" + response = self.client.get("/trainingjobs/{}/{}".format(usecase_name, version)) + print(response.data) + assert response.status_code == status.HTTP_400_BAD_REQUEST, "not equal status code" + assert response.data == b'{"Exception":"The trainingjob_name or version is not correct"}\n' + usecase_name="usecase7" + version="a" + response = self.client.get("/trainingjobs/{}/{}".format(usecase_name, version)) + assert response.status_code == status.HTTP_400_BAD_REQUEST, "not equal status code" + assert response.data == b'{"Exception":"The trainingjob_name or version is not correct"}\n' class Test_unpload_pipeline: def setup_method(self): @@ -286,6 +299,18 @@ class Test_get_steps_state: response = self.client.get("/trainingjobs/{}/{}/steps_state".format(usecase_name, version)) expected_data = b'data_extracted' assert response.status_code == 500, "not equal code" + + def test_negative_get_steps_state_by_name_and_version(self): + usecase_name = "usecase7*" + version = "1" + response = self.client.get("/trainingjobs/{}/{}/steps_state".format(usecase_name, version)) + assert response.status_code == status.HTTP_400_BAD_REQUEST, "not equal status code" + assert response.data == b'{"Exception":"The trainingjob_name or version is not correct"}\n' + usecase_name="usecase7" + version="a" + response = self.client.get("/trainingjobs/{}/{}/steps_state".format(usecase_name, version)) + assert response.status_code == status.HTTP_400_BAD_REQUEST, "not equal status code" + assert response.data == b'{"Exception":"The trainingjob_name or version is not correct"}\n' class Test_training_main: def setup_method(self): @@ -314,7 +339,7 @@ class Test_training_main: "bucket":"UEData" } expected_data = b'{"result": "Information stored in database."}' - response = self.client.post("/trainingjobs/".format("usecase1"), + response = self.client.post("/trainingjobs/{}".format("usecase1"), data=json.dumps(trainingjob_req), content_type="application/json") trainingmgr_main.LOGGER.debug(response.data) @@ -352,7 +377,7 @@ class Test_training_main: } expected_data = 'Information updated in database' - response = self.client.put("/trainingjobs/".format("usecase1"), + response = self.client.put("/trainingjobs/{}".format("usecase1"), data=json.dumps(trainingjob_req), content_type="application/json") trainingmgr_main.LOGGER.debug(response.data) @@ -380,7 +405,7 @@ class Test_training_main: "bucket":"UEData" } expected_data = 'is already present in database' - response = self.client.post("/trainingjobs/".format("usecase1"), + response = self.client.post("/trainingjobs/{}".format("usecase1"), data=json.dumps(trainingjob_req), content_type="application/json") trainingmgr_main.LOGGER.debug(response.data) @@ -408,7 +433,7 @@ class Test_training_main: def test_training(self,mock1,mock2,mock3,mock4): trainingmgr_main.LOGGER.debug("******* test_trainingjob_operations post *******") expected_data = 'Data Pipeline Execution Completed"' - response = self.client.post("/trainingjobs//training".format("usecase1"), + response = self.client.post("/trainingjobs/{}/training".format("usecase1"), content_type="application/json") trainingmgr_main.LOGGER.debug(response.data) assert response.status_code == status.HTTP_200_OK, "Return status code NOT equal" @@ -433,11 +458,21 @@ class Test_training_main: def test_training_negative_de_failed(self,mock1,mock2,mock3,mock4): trainingmgr_main.LOGGER.debug("******* test_trainingjob_operations post *******") expected_data = 'Data Pipeline Execution Failed' - response = self.client.post("/trainingjobs//training".format("usecase1"), + response = self.client.post("/trainingjobs/{}/training".format("usecase1"), content_type="application/json") trainingmgr_main.LOGGER.debug(response.data) assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR, "Return status code NOT equal" assert expected_data in str(response.data) + + def test_negative_training_by_trainingjob_name(self): + trainingjob_name="usecase*" + response=self.client.post('/trainingjobs/{}'.format(trainingjob_name), content_type="application/json") + assert response.status_code==status.HTTP_400_BAD_REQUEST + assert response.data == b'{"Exception":"The trainingjob_name is not correct"}\n' + response=self.client.post('/trainingjobs/{}/training'.format(trainingjob_name), content_type="application/json") + assert response.status_code==status.HTTP_400_BAD_REQUEST + assert response.data == b'{"Exception":"The trainingjob_name is not correct"}\n' + class Test_get_versions_for_pipeline: @patch('trainingmgr.common.trainingmgr_config.TMLogger', return_value = TMLogger("tests/common/conf_log.yaml")) @@ -620,6 +655,13 @@ class Test_get_metadata: assert response.content_type == "application/json", "not equal content type" assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR, "Should have thrown the exception " + def test_negative_get_metadata_by_name(self): + trainingjob_name="usecase*" + response=self.client.get('/trainingjobs/metadata/{}'.format(trainingjob_name), content_type="application/json") + print(response.data) + assert response.status_code==status.HTTP_400_BAD_REQUEST + assert response.data == b'{"Exception":"The trainingjob_name is not correct"}\n' + class Test_get_model: def setup_method(self): self.client = trainingmgr_main.APP.test_client(self) @@ -629,10 +671,23 @@ class Test_get_metadata: @patch('trainingmgr.trainingmgr_main.send_file', return_value = 'File') def test_negative_get_model(self,mock1): trainingjob_name = "usecase777" - version = 2 + version = "2" result = 'File' response = trainingmgr_main.get_model(trainingjob_name,version) assert response[1] == 500, "The function get_model Failed" + + def test_negative_get_model_by_name_or_version(self): + usecase_name = "usecase7*" + version = "1" + response = self.client.get("/model/{}/{}/Model.zip".format(usecase_name, version)) + assert response.status_code == status.HTTP_400_BAD_REQUEST, "not equal status code" + assert response.data == b'{"Exception":"The trainingjob_name or version is not correct"}\n' + usecase_name="usecase7" + version="a" + response = self.client.get("/model/{}/{}/Model.zip".format(usecase_name, version)) + assert response.status_code == status.HTTP_400_BAD_REQUEST, "not equal status code" + assert response.data == b'{"Exception":"The trainingjob_name or version is not correct"}\n' + class Test_get_metadata_1: def setup_method(self): @@ -674,7 +729,7 @@ class Test_get_metadata_1: def test_training_negative_de_notfound(self,mock1): trainingmgr_main.LOGGER.debug("******* test_training_404_NotFound *******") expected_data = '' - response = self.client.post("/trainingjobs//training".format("usecase1"), + response = self.client.post("/trainingjobs/{}/training".format("usecase1"), content_type="application/json") trainingmgr_main.LOGGER.debug(response.data) assert response.status_code == status.HTTP_404_NOT_FOUND, "Return status code NOT equal" @@ -983,6 +1038,12 @@ class Test_get_feature_group_by_name: response=self.client.get('/featureGroup/{}'.format(fg_name)) assert response.status_code == 500 , "status code is not equal" assert response.data == expected_data + + def test_negative_get_feature_group_name_for_incorrect_name(self): + featuregroup_name="usecase*" + response=self.client.get('/featureGroup/'.format(featuregroup_name), content_type="application/json") + assert response.status_code==status.HTTP_400_BAD_REQUEST + assert response.data == b'{"Exception":"The trainingjob_name is not correct"}\n' class Test_delete_list_of_feature_group: @patch('trainingmgr.common.trainingmgr_config.TMLogger', return_value = TMLogger("tests/common/conf_log.yaml")) diff --git a/trainingmgr/common/trainingmgr_util.py b/trainingmgr/common/trainingmgr_util.py index 8c46171..b8b31cf 100644 --- a/trainingmgr/common/trainingmgr_util.py +++ b/trainingmgr/common/trainingmgr_util.py @@ -31,6 +31,7 @@ from trainingmgr.common.exceptions_utls import APIException,TMException,DBExcept ERROR_TYPE_KF_ADAPTER_JSON = "Kf adapter doesn't sends json type response" MIMETYPE_JSON = "application/json" +PATTERN = re.compile(r"\w+") def response_for_training(code, message, logger, is_success, trainingjob_name, ps_db_obj, mm_sdk): """ @@ -244,8 +245,7 @@ def validate_trainingjob_name(trainingjob_name, ps_db_obj): """ results = None isavailable = False - pattern = re.compile(r"[a-zA-Z0-9_]+") - if (not re.fullmatch(pattern, trainingjob_name) or + if (not re.fullmatch(PATTERN, trainingjob_name) or len(trainingjob_name) < 3 or len(trainingjob_name) > 63): raise TMException("The name of training job is invalid.") @@ -280,3 +280,13 @@ def get_all_pipeline_names_svc(training_config_obj): logger.error(str(err)) logger.debug(pipeline_names) return pipeline_names + +def check_trainingjob_name_and_version(trainingjob_name, version): + if (re.fullmatch(PATTERN, trainingjob_name) and version.isnumeric()): + return True + return False + +def check_trainingjob_name_or_featuregroup_name(name): + if re.fullmatch(PATTERN, name): + return True + return False diff --git a/trainingmgr/trainingmgr_main.py b/trainingmgr/trainingmgr_main.py index ffb338f..4215727 100644 --- a/trainingmgr/trainingmgr_main.py +++ b/trainingmgr/trainingmgr_main.py @@ -39,7 +39,7 @@ from trainingmgr.common.trainingmgr_util import get_one_word_status, check_train check_key_in_dictionary, get_one_key, \ response_for_training, get_metrics, \ handle_async_feature_engineering_status_exception_case, \ - validate_trainingjob_name, get_all_pipeline_names_svc, check_feature_group_data + validate_trainingjob_name, get_all_pipeline_names_svc, check_feature_group_data, check_trainingjob_name_and_version, check_trainingjob_name_or_featuregroup_name from trainingmgr.common.exceptions_utls import APIException,TMException from trainingmgr.constants.steps import Steps from trainingmgr.constants.states import States @@ -141,8 +141,13 @@ def get_trainingjob_by_name_version(trainingjob_name, version): all exception are provided with exception message and HTTP status code. """ + response_code = status.HTTP_500_INTERNAL_SERVER_ERROR + response_data = {} + if not check_trainingjob_name_and_version(trainingjob_name, version): + return {"Exception":"The trainingjob_name or version is not correct"}, status.HTTP_400_BAD_REQUEST + LOGGER.debug("Request to fetch trainingjob by name and version(trainingjob:" + \ - trainingjob_name + " ,version:" + version + ")") + trainingjob_name + " ,version:" + version + ")") response_code = status.HTTP_500_INTERNAL_SERVER_ERROR response_data = {} try: @@ -230,26 +235,28 @@ def get_steps_state(trainingjob_name, version): Exceptions: all exception are provided with exception message and HTTP status code. """ - LOGGER.debug("Request to get steps_state for (trainingjob:" + \ - trainingjob_name + " and version: " + version + ")") - reponse_data = {} response_code = status.HTTP_500_INTERNAL_SERVER_ERROR + response_data = {} + if not check_trainingjob_name_and_version(trainingjob_name, version): + return {"Exception":"The trainingjob_name or version is not correct"}, status.HTTP_400_BAD_REQUEST + LOGGER.debug("Request to get steps_state for (trainingjob:" + \ + trainingjob_name + " and version: " + version + ")") try: results = get_field_of_given_version(trainingjob_name, version, PS_DB_OBJ, "steps_state") LOGGER.debug("get_field_of_given_version:" + str(results)) if results: - reponse_data = results[0][0] + response_data = results[0][0] response_code = status.HTTP_200_OK else: - + response_code = status.HTTP_404_NOT_FOUND raise TMException("Not found given trainingjob in database") except Exception as err: LOGGER.error(str(err)) - reponse_data = {"Exception": str(err)} + response_data = {"Exception": str(err)} - return APP.response_class(response=reponse_data, + return APP.response_class(response=response_data, status=response_code, mimetype=MIMETYPE_JSON) @@ -273,6 +280,9 @@ def get_model(trainingjob_name, version): Exceptions: all exception are provided with exception message and HTTP status code. """ + if not check_trainingjob_name_and_version(trainingjob_name, version): + return {"Exception":"The trainingjob_name or version is not correct"}, status.HTTP_400_BAD_REQUEST + try: return send_file(MM_SDK.get_model_zip(trainingjob_name, version), mimetype='application/zip') except Exception: @@ -305,16 +315,17 @@ def training(trainingjob_name): Exceptions: all exception are provided with exception message and HTTP status code. """ - - LOGGER.debug("Request for training trainingjob %s ", trainingjob_name) - response_data = {} response_code = status.HTTP_500_INTERNAL_SERVER_ERROR + response_data = {} + if not check_trainingjob_name_or_featuregroup_name(trainingjob_name): + return {"Exception":"The trainingjob_name is not correct"}, status.HTTP_400_BAD_REQUEST + LOGGER.debug("Request for training trainingjob %s ", trainingjob_name) try: isDataAvaible = validate_trainingjob_name(trainingjob_name, PS_DB_OBJ) if not isDataAvaible: response_code = status.HTTP_404_NOT_FOUND raise TMException("Given trainingjob name is not present in database" + \ - "(trainingjob: " + trainingjob_name + ")") from None + "(trainingjob: " + trainingjob_name + ")") from None else: db_results = get_trainingjob_info_by_name(trainingjob_name, PS_DB_OBJ) @@ -326,8 +337,8 @@ def training(trainingjob_name): LOGGER.debug('Starting Data Extraction...') de_response = data_extraction_start(TRAININGMGR_CONFIG_OBJ, trainingjob_name, - feature_list, query_filter, datalake_source, - _measurement, bucket) + feature_list, query_filter, datalake_source, + _measurement, bucket) if (de_response.status_code == status.HTTP_200_OK ): LOGGER.debug("Response from data extraction for " + \ trainingjob_name + " : " + json.dumps(de_response.json())) @@ -349,7 +360,7 @@ def training(trainingjob_name): raise TMException(errMsg) else: raise TMException("Data extraction doesn't send json type response" + \ - "(trainingjob name is " + trainingjob_name + ")") from None + "(trainingjob name is " + trainingjob_name + ")") from None except Exception as err: response_data = {"Exception": str(err)} LOGGER.debug("Error is training, job name:" + trainingjob_name + str(err)) @@ -386,10 +397,9 @@ def data_extraction_notification(): try: if not check_key_in_dictionary(["trainingjob_name"], request.json) : err_msg = "Trainingjob_name key not available in request" - Logger.error(err_msg) - err_response_code = status.HTTP_400_BAD_REQUEST - raise TMException(err_msg) - + LOGGER.error(err_msg) + return {"Exception":err_msg}, status.HTTP_400_BAD_REQUEST + trainingjob_name = request.json["trainingjob_name"] results = get_trainingjob_info_by_name(trainingjob_name, PS_DB_OBJ) arguments = json.loads(results[0][5])['arguments'] @@ -436,6 +446,7 @@ def data_extraction_notification(): return response_for_training(err_response_code, err_msg + str(err) + "(trainingjob name is " + trainingjob_name + ")", LOGGER, False, trainingjob_name, PS_DB_OBJ, MM_SDK) + except Exception as err: LOGGER.error("Failed to handle dataExtractionNotification. " + str(err)) if not change_in_progress_to_failed_by_latest_version(trainingjob_name, PS_DB_OBJ) : @@ -616,8 +627,8 @@ def upload_pipeline(pipe_name): else: result_string = "Didn't get file" raise ValueError("file not found in request.files") - pattern = re.compile(r"[a-zA-Z0-9_]+") - if not re.fullmatch(pattern, pipe_name): + + if not check_trainingjob_name_or_featuregroup_name(pipe_name): err_msg="the pipeline name is not valid" raise TMException(err_msg) LOGGER.debug("Uploading received for %s", uploaded_file.filename) @@ -869,8 +880,11 @@ def trainingjob_operations(trainingjob_name): Exceptions: All exception are provided with exception message and HTTP status code. """ - api_response = {} response_code = status.HTTP_500_INTERNAL_SERVER_ERROR + api_response = {} + if not check_trainingjob_name_or_featuregroup_name(trainingjob_name): + return {"Exception":"The trainingjob_name is not correct"}, status.HTTP_400_BAD_REQUEST + LOGGER.debug("Training job create/update request(trainingjob name %s) ", trainingjob_name ) try: json_data = request.json @@ -1213,18 +1227,19 @@ def get_metadata(trainingjob_name): Exceptions: all exception are provided with exception message and HTTP status code. """ + response_code = status.HTTP_500_INTERNAL_SERVER_ERROR + api_response = {} + if not check_trainingjob_name_or_featuregroup_name(trainingjob_name): + return {"Exception":"The trainingjob_name is not correct"}, status.HTTP_400_BAD_REQUEST LOGGER.debug("Request metadata for trainingjob(name of trainingjob is %s) ", trainingjob_name) - api_response = {} - response_code = status.HTTP_500_INTERNAL_SERVER_ERROR try: results = get_all_versions_info_by_name(trainingjob_name, PS_DB_OBJ) if results: info_list = [] for trainingjob_info in results: if (get_one_word_status(json.loads(trainingjob_info[9])) == States.FINISHED.name and - not trainingjob_info[19]): - + not trainingjob_info[19]): LOGGER.debug("Downloading metric for " +trainingjob_name ) data = get_metrics(trainingjob_name, trainingjob_info[11], MM_SDK) url = "http://" + str(TRAININGMGR_CONFIG_OBJ.my_ip) + ":" + \ @@ -1309,8 +1324,7 @@ def create_feature_group(): # check the data conformance LOGGER.debug("the db info is : ", get_feature_group_by_name_db(PS_DB_OBJ, feature_group_name)) - pattern = re.compile(r"[a-zA-Z0-9_]+") - if (not re.fullmatch(pattern, feature_group_name) or + if (not check_trainingjob_name_or_featuregroup_name(feature_group_name) or len(feature_group_name) < 3 or len(feature_group_name) > 63 or get_feature_group_by_name_db(PS_DB_OBJ, feature_group_name)): api_response = {"Exception": "Failed to create the feature group since feature group not valid or already present"} @@ -1431,39 +1445,36 @@ def get_feature_group_by_name(featuregroup_name): """ api_response={} response_code=status.HTTP_500_INTERNAL_SERVER_ERROR - pattern = re.compile(r"[a-zA-Z0-9_]+") - if not re.fullmatch(pattern, featuregroup_name): - api_response={"Exception": "Invalid featuregroup_name"} - response_code=status.HTTP_400_BAD_REQUEST - else: - LOGGER.debug("Request for getting a feature group with name = "+ featuregroup_name) - try: - result= get_feature_group_by_name_db(PS_DB_OBJ, featuregroup_name) - feature_group=[] - if result: - for res in result: - features=res[1].split(",") - dict_data={ - "featuregroup_name": res[0], - "features": features, - "datalake": res[2], - "dme": res[3], - "dme_host": res[4], - "dme_port": res[5], - "bucket":res[6], - "token":res[7], - "source_name":res[8], - "db_org":res[9] - } - feature_group.append(dict_data) - api_response={"featuregroup":feature_group} - response_code=status.HTTP_200_OK - else: - response_code=status.HTTP_404_NOT_FOUND - raise TMException("Failed to fetch feature group info from db") - except Exception as err: - api_response = {"Exception": str(err)} - LOGGER.error(str(err)) + if not check_trainingjob_name_or_featuregroup_name(featuregroup_name): + return {"Exception":"The trainingjob_name is not correct"}, status.HTTP_400_BAD_REQUEST + LOGGER.debug("Request for getting a feature group with name = "+ featuregroup_name) + try: + result= get_feature_group_by_name_db(PS_DB_OBJ, featuregroup_name) + feature_group=[] + if result: + for res in result: + features=res[1].split(",") + dict_data={ + "featuregroup_name": res[0], + "features": features, + "datalake": res[2], + "dme": res[3], + "dme_host": res[4], + "dme_port": res[5], + "bucket":res[6], + "token":res[7], + "source_name":res[8], + "db_org":res[9] + } + feature_group.append(dict_data) + api_response={"featuregroup":feature_group} + response_code=status.HTTP_200_OK + else: + response_code=status.HTTP_404_NOT_FOUND + raise TMException("Failed to fetch feature group info from db") + except Exception as err: + api_response = {"Exception": str(err)} + LOGGER.error(str(err)) return APP.response_class(response=json.dumps(api_response), status=response_code, mimetype=MIMETYPE_JSON) -- 2.16.6