From ff5472f26ca5506052ea5c67aeb1673abf7e844e Mon Sep 17 00:00:00 2001 From: ashishj1729 Date: Mon, 6 Jan 2025 15:20:11 +0530 Subject: [PATCH] Updating docs for Model-deployment Change-Id: Ib6f86a257d1041464228575c496db92208cbaa9e Signed-off-by: ashishj1729 --- .../model-deployment/myapplication-image-build.sh | 26 +++++ demos/model-deployment/myapplication/Dockerfile | 27 +++++ demos/model-deployment/myapplication/app/main.py | 75 ++++++++++++ .../myapplication/app/requirements.txt | 20 ++++ .../model-deployment/myapplication/deployment.yaml | 49 ++++++++ demos/model-deployment/qoe.yaml | 29 +++++ docs/installation-guide.rst | 126 ++++++++++----------- 7 files changed, 287 insertions(+), 65 deletions(-) create mode 100644 demos/model-deployment/myapplication-image-build.sh create mode 100644 demos/model-deployment/myapplication/Dockerfile create mode 100644 demos/model-deployment/myapplication/app/main.py create mode 100644 demos/model-deployment/myapplication/app/requirements.txt create mode 100644 demos/model-deployment/myapplication/deployment.yaml create mode 100644 demos/model-deployment/qoe.yaml diff --git a/demos/model-deployment/myapplication-image-build.sh b/demos/model-deployment/myapplication-image-build.sh new file mode 100644 index 0000000..2acdac6 --- /dev/null +++ b/demos/model-deployment/myapplication-image-build.sh @@ -0,0 +1,26 @@ +# ================================================================================== +# +# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ================================================================================== + +# sudo nerdctl --namespace k8s.io rmi -f myapplication:dev +echo "Building New Image & Loading" +sudo buildctl --addr=nerdctl-container://buildkitd build \ + --frontend dockerfile.v0 \ + --opt filename=Dockerfile \ + --local dockerfile=myapplication \ + --local context=myapplication \ + --output type=oci,name=myapplication:dev | sudo nerdctl load --namespace k8s.io \ No newline at end of file diff --git a/demos/model-deployment/myapplication/Dockerfile b/demos/model-deployment/myapplication/Dockerfile new file mode 100644 index 0000000..611c9e7 --- /dev/null +++ b/demos/model-deployment/myapplication/Dockerfile @@ -0,0 +1,27 @@ +# ================================================================================== +# +# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ================================================================================== +FROM python:3.10-slim +# Set environment variables to avoid user prompts during package installation +ENV DEBIAN_FRONTEND=noninteractive +# Install necessary packages and clean up +RUN apt-get update && apt-get install -y bash && apt-get clean && rm -rf /var/lib/apt/lists/* +COPY app app +# Location in the container +WORKDIR /app +RUN pip3 install -r requirements.txt +# CMD ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/demos/model-deployment/myapplication/app/main.py b/demos/model-deployment/myapplication/app/main.py new file mode 100644 index 0000000..3c9c9ca --- /dev/null +++ b/demos/model-deployment/myapplication/app/main.py @@ -0,0 +1,75 @@ +# ================================================================================== +# +# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ================================================================================== +import simplejson as json +import os +import requests +import time + +KSERVE_HOST = os.environ['KSERVE_HOST'] +MODEL_NAME = os.environ['MODEL_NAME'] +PREDICTION_URL = f"{KSERVE_HOST}/v1/models/{MODEL_NAME}:predict" + +def predict_single_at_time(model_input : list): + ''' + model_input must be list + ''' + headers = { + "Content-Type": "application/json" + } + + data = { + "signature_name": "serving_default", + "instances" : [model_input] + } + response = requests.post(PREDICTION_URL, headers=headers, json=data) + if response.status_code != 200: + print("Error| Status-code is not 200| ", response.text) + return -1 + predictions = json.loads(response.text) + + # Since we predicting for single dataPoint + return predictions['predictions'][0] + +def make_requests(): + data = [[2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56]] + print("Input-data : ", data) + while True: + try: + predicted = predict_single_at_time(data) + print(f"Predicted-Values : {predicted}") + print("--------------------------------------------------------") + time.sleep(5) + except Exception as err: + print("Recieved Error while make prediction requests | Error : ", err) + # Keep-trying after 5 seconds + time.sleep(5) + + +if __name__ == '__main__': + make_requests() + + \ No newline at end of file diff --git a/demos/model-deployment/myapplication/app/requirements.txt b/demos/model-deployment/myapplication/app/requirements.txt new file mode 100644 index 0000000..468133d --- /dev/null +++ b/demos/model-deployment/myapplication/app/requirements.txt @@ -0,0 +1,20 @@ +# ================================================================================== +# +# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ================================================================================== + +simplejson +requests \ No newline at end of file diff --git a/demos/model-deployment/myapplication/deployment.yaml b/demos/model-deployment/myapplication/deployment.yaml new file mode 100644 index 0000000..eeb1f12 --- /dev/null +++ b/demos/model-deployment/myapplication/deployment.yaml @@ -0,0 +1,49 @@ +# ================================================================================== +# +# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ================================================================================== +apiVersion: apps/v1 +kind: Deployment +metadata: + name: myapplication + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: myapplication + template: + metadata: + labels: + app: myapplication + spec: + containers: + - name: myapplication-container + image: myapplication:dev + imagePullPolicy: IfNotPresent + command: ["python3", "-u","main.py"] + resources: + requests: + memory: "128Mi" + cpu: "250m" + limits: + memory: "256Mi" + cpu: "500m" + env: + - name: KSERVE_HOST + value: "http://qoe-model.kserve-test.svc.cluster.local" + - name: MODEL_NAME + value: "qoe-model" \ No newline at end of file diff --git a/demos/model-deployment/qoe.yaml b/demos/model-deployment/qoe.yaml new file mode 100644 index 0000000..23d4734 --- /dev/null +++ b/demos/model-deployment/qoe.yaml @@ -0,0 +1,29 @@ +# ================================================================================== +# +# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ================================================================================== +apiVersion: "serving.kserve.io/v1beta1" +kind: "InferenceService" +metadata: + name: "qoe-model" + namespace: "kserve-test" +spec: + predictor: + model: + modelFormat: + name: tensorflow + storageUri: "" + \ No newline at end of file diff --git a/docs/installation-guide.rst b/docs/installation-guide.rst index d76ec51..f899135 100755 --- a/docs/installation-guide.rst +++ b/docs/installation-guide.rst @@ -544,45 +544,39 @@ NOTE: Below are some example values to be used for the QoE usecase training job Obtain Model URL for deploying trained models --------------------------------------------- -URL for deployment can be obainted from AIMFW dashboard (Training Jobs-> Training Job status -> Select Info for a training job -> Model URL) -In case of using AIMLFW Model management service, URL for downloading and deploying model using Model Management Service will be the following: - .. code:: bash - http://:32006/downloadModel//model.zip + http://:32002/model////Model.zip + + +.. _reference4: -Install only Kserve for deploying models +Model-Deployment ---------------------------------------- -To install Kserve run the below commands +1. Installing Kserve .. code:: bash ./bin/install_kserve.sh - -Uninstall only Kserve ---------------------- - -To uninstall Kserve run the below commands +2. Verify Installation .. code:: bash - ./bin/uninstall_kserve.sh + ~$ kubectl get pods -n kserve + NAME READY STATUS RESTARTS AGE + kserve-controller-manager-5d995bd58-9pf6x 2/2 Running 0 6d18h - -.. _reference4: - -Deploy trained qoe prediction model on Kserve ---------------------------------------------- - -Create namespace using command below +3. Deploy trained qoe prediction model on Kserve .. code:: bash + # Create namespace kubectl create namespace kserve-test + Create :file:`qoe.yaml` file with below contents .. code-block:: yaml @@ -590,19 +584,14 @@ Create :file:`qoe.yaml` file with below contents apiVersion: "serving.kserve.io/v1beta1" kind: "InferenceService" metadata: - name: qoe-model + name: "qoe-model" + namespace: kserve-test spec: predictor: - tensorflow: - storageUri: "" - runtimeVersion: "2.5.1" - resources: - requests: - cpu: 0.1 - memory: 0.5Gi - limits: - cpu: 0.1 - memory: 0.5Gi + model: + modelFormat: + name: tensorflow + storageUri: "" To deploy model update the Model URL in the :file:`qoe.yaml` file and execute below command to deploy model @@ -610,63 +599,70 @@ Refer :ref:`Obtain Model URL for deploying trained models ` .. code:: bash - kubectl apply -f qoe.yaml -n kserve-test - -Check running state of pod using below command - -.. code:: bash + kubectl apply -f qoe.yaml - kubectl get pods -n kserve-test + +Verify Model-Deployment -Test predictions using model deployed on Kserve ------------------------------------------------ +.. code:: bash -Use below command to obtain Ingress port for Kserve. + ~$ kubectl get InferenceService -n kserve-test -.. code:: bash + NAME URL READY PREV LATEST PREVROLLEDOUTREVISION LATESTREADYREVISION AGE + qoe-model http://qoe-model.kserve-test.svc.cluster.local True 100 qoe-model-predictor-00001 42s - kubectl get svc istio-ingressgateway -n istio-system -Obtain nodeport corresponding to port 80. -In the below example, the port is 31206. + ~$ kubectl get pods -n kserve-test -.. code:: + NAME READY STATUS RESTARTS AGE + qoe-model-predictor-00001-deployment-86d9db6cb-5r8st 2/2 Running 0 93s - NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE - istio-ingressgateway LoadBalancer 10.105.222.242 15021:31423/TCP,80:31206/TCP,443:32145/TCP,31400:32338/TCP,15443:31846/TCP 4h15m +4. Test predictions using model deployed on Kserve -Create predict.sh file with following contents +In order to test our deployed-model, we will query the InferenceService from a curl-pod. .. code:: bash - model_name=qoe-model - curl -v -H "Host: $model_name.kserve-test.example.com" http://:/v1/models/$model_name:predict -d @./input_qoe.json + # Deploy a curl-pod + kubectl run curl-pod --image=curlimages/curl:latest --command sleep 3600 + # Query Inference-Service + kubectl exec -it curl-pod -- \ + curl \ + --location http://qoe-model.kserve-test.svc.cluster.local/v1/models/qoe-model:predict \ + --header "Content-Type: application/json" \ + --data '{ + "signature_name": "serving_default", + "instances": [[ + [2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56], + [2.56, 2.56]] + ] + }' -Update the ``IP`` of host where Kserve is deployed and ingress port of Kserve obtained using above method. +| Note: We can change which deployed-model to query by changing the location as: +| location = /v1/models/:predict, where +| a. MODEL_NAME: Refers to the Name of Inference-Service +| b. KSERVE_HOST: Refers to the URL of Inference-Service -Create sample data for predictions in file :file:`input_qoe.json`. Add the following content in :file:`input_qoe.json` file. -.. code:: bash - {"signature_name": "serving_default", "instances": [[[2.56, 2.56], - [2.56, 2.56], - [2.56, 2.56], - [2.56, 2.56], - [2.56, 2.56], - [2.56, 2.56], - [2.56, 2.56], - [2.56, 2.56], - [2.56, 2.56], - [2.56, 2.56]]]} +5. Uninstall Kserve +.. code:: bash -Use command below to trigger predictions + ./bin/uninstall_kserve.sh -.. code:: bash - source predict.sh +For Advanced usecases, Please refer to official kserve-documentation `here `__ Install both Kserve and Kserve adapter for deploying models -- 2.16.6