From 297dbd6245ec69571c8ad7091a18cbe9c7ba2488 Mon Sep 17 00:00:00 2001 From: deepanshuk Date: Tue, 3 Nov 2020 13:09:19 +0530 Subject: [PATCH] [Issue-Id: RICAPP-142] Updated the commnents in the programs and add files for jjb Updated README file and comments for the programs(ad_train.py, processing.py, ad_model.py) Added __init__ in ad folder, setup.py and tox file for jjb [Issue-Id: RICAPP-142] Implemented HDBScan for clustering and Random Forest for classification to detect the anomaly Added and updated the below files. main.py: Main program to predict the anomaly for the selected UEID. Send the UEID and timestamp for the anomalous entries to the Traffic Steering (rmr with the message type as 30003) ad_train.py: Train the machine learning algorithm and save the model using the input csv files and save the model. ue_test.csv: Input csv file has 1000 samples and for each UEID has one or more than one entries for poor signal. ue_data: List of UEID specific csv files to train the model [Issue-Id: RICAPP-142] Anomaly detection xApp that integrates with the traffic steering use case Signed-off-by: deepanshuk Change-Id: I13f890244cf5ce27b4d07e617a1f8b26adde4b9f --- .gitreview | 12 +- Dockerfile | 41 + LICENSE.txt | 29 + README.txt | 45 + ad/LabelEncoder | Bin 0 -> 571 bytes ad/__init__.py | 15 + ad/ad_model/__pycache__/ad_model.cpython-38.pyc | Bin 0 -> 1957 bytes ad/ad_model/__pycache__/processing.cpython-38.pyc | Bin 0 -> 2540 bytes ad/ad_model/__pycache__/tb_format.cpython-38.pyc | Bin 0 -> 917 bytes ad/ad_model/ad_model.py | 73 + ad/ad_model/processing.py | 79 + ad/ad_model/tb_format.py | 36 + ad/ad_train.py | 137 + ad/main.py | 73 + ad/scale | Bin 0 -> 715 bytes ad/test_route.rt | 19 + ad/ue_data/12345.csv | 1986 ++++ ad/ue_data/12346.csv | 1994 ++++ ad/ue_data/12347.csv | 1993 ++++ ad/ue_data/12348.csv | 1984 ++++ ad/ue_data/12349.csv | 1987 ++++ ad/ue_data/12350.csv | 1990 ++++ ad/ue_data/12351.csv | 1996 ++++ ad/ue_data/12352.csv | 1992 ++++ ad/ue_data/12353.csv | 1991 ++++ ad/ue_data/12354.csv | 1991 ++++ ad/ue_data/12355.csv | 1990 ++++ ad/ue_data/12356.csv | 1989 ++++ ad/ue_data/12357.csv | 1993 ++++ ad/ue_data/12358.csv | 1993 ++++ ad/ue_data/12359.csv | 1995 ++++ ad/ue_data/12360.csv | 1989 ++++ ad/ue_data/12361.csv | 1992 ++++ ad/ue_data/12362.csv | 1991 ++++ ad/ue_data/12363.csv | 1991 ++++ ad/ue_data/12364.csv | 1990 ++++ ad/ue_data/12365.csv | 1992 ++++ ad/ue_data/12366.csv | 1994 ++++ ad/ue_data/12367.csv | 1991 ++++ ad/ue_data/12368.csv | 1988 ++++ ad/ue_data/12369.csv | 1991 ++++ ad/ue_data/12370.csv | 1992 ++++ ad/ue_data/12371.csv | 1990 ++++ ad/ue_data/12372.csv | 1989 ++++ ad/ue_data/12373.csv | 1984 ++++ ad/ue_data/12374.csv | 10349 ++++++++++++++++++++ ad/ue_data/12375.csv | 2788 ++++++ ad/ue_data/12376.csv | 2787 ++++++ ad/ue_data/12377.csv | 2839 ++++++ ad/ue_data/12378.csv | 2801 ++++++ ad/ue_data/12379.csv | 2842 ++++++ ad/ue_data/12380.csv | 2811 ++++++ ad/ue_data/12381.csv | 2829 ++++++ ad/ue_data/12382.csv | 2827 ++++++ ad/ue_data/12383.csv | 2805 ++++++ ad/ue_data/12384.csv | 2781 ++++++ ad/ue_data/12385.csv | 2799 ++++++ ad/ue_data/12386.csv | 2833 ++++++ ad/ue_data/12387.csv | 2747 ++++++ ad/ue_data/12388.csv | 2818 ++++++ ad/ue_data/12389.csv | 2851 ++++++ ad/ue_data/12390.csv | 2827 ++++++ ad/ue_data/12391.csv | 2856 ++++++ ad/ue_data/12392.csv | 2808 ++++++ ad/ue_data/12393.csv | 2802 ++++++ ad/ue_data/12394.csv | 2783 ++++++ ad/ue_data/12395.csv | 2832 ++++++ ad/ue_data/12396.csv | 2783 ++++++ ad/ue_data/12397.csv | 2777 ++++++ ad/ue_data/12398.csv | 2820 ++++++ ad/ue_data/12399.csv | 2837 ++++++ ad/ue_data/12400.csv | 2781 ++++++ ad/ue_data/12401.csv | 2864 ++++++ ad/ue_data/12402.csv | 2839 ++++++ ad/ue_data/12403.csv | 2832 ++++++ ad/ue_data/12404.csv | 10349 ++++++++++++++++++++ ad/ue_data/12405.csv | 10349 ++++++++++++++++++++ ad/ue_data/12406.csv | 10349 ++++++++++++++++++++ ad/ue_data/12407.csv | 10349 ++++++++++++++++++++ ad/ue_data/12408.csv | 10349 ++++++++++++++++++++ ad/ue_data/12409.csv | 10349 ++++++++++++++++++++ ad/ue_data/12410.csv | 10349 ++++++++++++++++++++ ad/ue_data/12411.csv | 10349 ++++++++++++++++++++ ad/ue_data/12412.csv | 10349 ++++++++++++++++++++ ad/ue_data/12413.csv | 10349 ++++++++++++++++++++ ad/ue_data/12414.csv | 10349 ++++++++++++++++++++ ad/ue_data/12415.csv | 10349 ++++++++++++++++++++ ad/ue_data/12416.csv | 10349 ++++++++++++++++++++ ad/ue_data/12417.csv | 10349 ++++++++++++++++++++ ad/ue_data/12418.csv | 10349 ++++++++++++++++++++ ad/ue_data/12419.csv | 10349 ++++++++++++++++++++ ad/ue_data/12420.csv | 10349 ++++++++++++++++++++ ad/ue_data/12421.csv | 10349 ++++++++++++++++++++ ad/ue_data/12422.csv | 10349 ++++++++++++++++++++ ad/ue_data/12423.csv | 10349 ++++++++++++++++++++ ad/ue_data/12424.csv | 10349 ++++++++++++++++++++ ad/ue_data/12425.csv | 5160 ++++++++++ ad/ue_data/12426.csv | 5193 ++++++++++ ad/ue_data/12427.csv | 5202 ++++++++++ ad/ue_data/12428.csv | 5179 ++++++++++ ad/ue_data/12429.csv | 5170 ++++++++++ ad/ue_data/12430.csv | 5190 ++++++++++ ad/ue_data/12431.csv | 5199 ++++++++++ ad/ue_data/12432.csv | 5194 ++++++++++ ad/ue_data/12433.csv | 5174 ++++++++++ ad/ue_data/12434.csv | 5143 ++++++++++ ad/ue_data/12435.csv | 367 + ad/ue_data/12436.csv | 365 + ad/ue_data/12437.csv | 361 + ad/ue_data/12438.csv | 370 + ad/ue_data/12439.csv | 364 + ad/ue_data/12440.csv | 364 + ad/ue_data/12441.csv | 366 + ad/ue_data/12442.csv | 368 + ad/ue_data/12443.csv | 355 + ad/ue_data/12444.csv | 361 + ad/ue_seg.json | 1 + ad/ue_test.csv | 5201 ++++++++++ local.rt | 4 + setup.py | 29 + tests/__init__.py | 1 + tests/fixtures/test_local.rt | 18 + tests/testad.py | 34 + tox.ini | 73 + xapp-descriptor/config.json | 43 + 125 files changed, 428406 insertions(+), 7 deletions(-) create mode 100644 Dockerfile create mode 100644 LICENSE.txt create mode 100644 README.txt create mode 100644 ad/LabelEncoder create mode 100644 ad/__init__.py create mode 100644 ad/ad_model/__pycache__/ad_model.cpython-38.pyc create mode 100644 ad/ad_model/__pycache__/processing.cpython-38.pyc create mode 100644 ad/ad_model/__pycache__/tb_format.cpython-38.pyc create mode 100644 ad/ad_model/ad_model.py create mode 100644 ad/ad_model/processing.py create mode 100644 ad/ad_model/tb_format.py create mode 100644 ad/ad_train.py create mode 100644 ad/main.py create mode 100644 ad/scale create mode 100644 ad/test_route.rt create mode 100644 ad/ue_data/12345.csv create mode 100644 ad/ue_data/12346.csv create mode 100644 ad/ue_data/12347.csv create mode 100644 ad/ue_data/12348.csv create mode 100644 ad/ue_data/12349.csv create mode 100644 ad/ue_data/12350.csv create mode 100644 ad/ue_data/12351.csv create mode 100644 ad/ue_data/12352.csv create mode 100644 ad/ue_data/12353.csv create mode 100644 ad/ue_data/12354.csv create mode 100644 ad/ue_data/12355.csv create mode 100644 ad/ue_data/12356.csv create mode 100644 ad/ue_data/12357.csv create mode 100644 ad/ue_data/12358.csv create mode 100644 ad/ue_data/12359.csv create mode 100644 ad/ue_data/12360.csv create mode 100644 ad/ue_data/12361.csv create mode 100644 ad/ue_data/12362.csv create mode 100644 ad/ue_data/12363.csv create mode 100644 ad/ue_data/12364.csv create mode 100644 ad/ue_data/12365.csv create mode 100644 ad/ue_data/12366.csv create mode 100644 ad/ue_data/12367.csv create mode 100644 ad/ue_data/12368.csv create mode 100644 ad/ue_data/12369.csv create mode 100644 ad/ue_data/12370.csv create mode 100644 ad/ue_data/12371.csv create mode 100644 ad/ue_data/12372.csv create mode 100644 ad/ue_data/12373.csv create mode 100644 ad/ue_data/12374.csv create mode 100644 ad/ue_data/12375.csv create mode 100644 ad/ue_data/12376.csv create mode 100644 ad/ue_data/12377.csv create mode 100644 ad/ue_data/12378.csv create mode 100644 ad/ue_data/12379.csv create mode 100644 ad/ue_data/12380.csv create mode 100644 ad/ue_data/12381.csv create mode 100644 ad/ue_data/12382.csv create mode 100644 ad/ue_data/12383.csv create mode 100644 ad/ue_data/12384.csv create mode 100644 ad/ue_data/12385.csv create mode 100644 ad/ue_data/12386.csv create mode 100644 ad/ue_data/12387.csv create mode 100644 ad/ue_data/12388.csv create mode 100644 ad/ue_data/12389.csv create mode 100644 ad/ue_data/12390.csv create mode 100644 ad/ue_data/12391.csv create mode 100644 ad/ue_data/12392.csv create mode 100644 ad/ue_data/12393.csv create mode 100644 ad/ue_data/12394.csv create mode 100644 ad/ue_data/12395.csv create mode 100644 ad/ue_data/12396.csv create mode 100644 ad/ue_data/12397.csv create mode 100644 ad/ue_data/12398.csv create mode 100644 ad/ue_data/12399.csv create mode 100644 ad/ue_data/12400.csv create mode 100644 ad/ue_data/12401.csv create mode 100644 ad/ue_data/12402.csv create mode 100644 ad/ue_data/12403.csv create mode 100644 ad/ue_data/12404.csv create mode 100644 ad/ue_data/12405.csv create mode 100644 ad/ue_data/12406.csv create mode 100644 ad/ue_data/12407.csv create mode 100644 ad/ue_data/12408.csv create mode 100644 ad/ue_data/12409.csv create mode 100644 ad/ue_data/12410.csv create mode 100644 ad/ue_data/12411.csv create mode 100644 ad/ue_data/12412.csv create mode 100644 ad/ue_data/12413.csv create mode 100644 ad/ue_data/12414.csv create mode 100644 ad/ue_data/12415.csv create mode 100644 ad/ue_data/12416.csv create mode 100644 ad/ue_data/12417.csv create mode 100644 ad/ue_data/12418.csv create mode 100644 ad/ue_data/12419.csv create mode 100644 ad/ue_data/12420.csv create mode 100644 ad/ue_data/12421.csv create mode 100644 ad/ue_data/12422.csv create mode 100644 ad/ue_data/12423.csv create mode 100644 ad/ue_data/12424.csv create mode 100644 ad/ue_data/12425.csv create mode 100644 ad/ue_data/12426.csv create mode 100644 ad/ue_data/12427.csv create mode 100644 ad/ue_data/12428.csv create mode 100644 ad/ue_data/12429.csv create mode 100644 ad/ue_data/12430.csv create mode 100644 ad/ue_data/12431.csv create mode 100644 ad/ue_data/12432.csv create mode 100644 ad/ue_data/12433.csv create mode 100644 ad/ue_data/12434.csv create mode 100644 ad/ue_data/12435.csv create mode 100644 ad/ue_data/12436.csv create mode 100644 ad/ue_data/12437.csv create mode 100644 ad/ue_data/12438.csv create mode 100644 ad/ue_data/12439.csv create mode 100644 ad/ue_data/12440.csv create mode 100644 ad/ue_data/12441.csv create mode 100644 ad/ue_data/12442.csv create mode 100644 ad/ue_data/12443.csv create mode 100644 ad/ue_data/12444.csv create mode 100644 ad/ue_seg.json create mode 100644 ad/ue_test.csv create mode 100644 local.rt create mode 100644 setup.py create mode 100644 tests/__init__.py create mode 100644 tests/fixtures/test_local.rt create mode 100644 tests/testad.py create mode 100644 tox.ini create mode 100644 xapp-descriptor/config.json diff --git a/.gitreview b/.gitreview index 8ec417d..d2c3ba9 100644 --- a/.gitreview +++ b/.gitreview @@ -1,7 +1,5 @@ - - [gerrit] - host=gerrit.o-ran-sc.org - port=29418 - project=ric-app/ad - defaultbranch=master - \ No newline at end of file +[gerrit] +host=gerrit.o-ran-sc.org +port=29418 +project=ric-app/ad +defaultbranch=master diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..df9c2b8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,41 @@ +# ================================================================================== +# Copyright (c) 2019 AT&T Intellectual Property. +# Copyright (c) 2020 HCL Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ================================================================================== +FROM frolvlad/alpine-miniconda3 +# RMR setup +RUN mkdir -p /opt/route/ + +# copy rmr files from builder image in lieu of an Alpine package +COPY --from=nexus3.o-ran-sc.org:10002/o-ran-sc/bldr-alpine3-rmr:4.0.5 /usr/local/lib64/librmr* /usr/local/lib64/ + +COPY --from=nexus3.o-ran-sc.org:10002/o-ran-sc/bldr-alpine3-rmr:4.0.5 /usr/local/bin/rmr* /usr/local/bin/ +ENV LD_LIBRARY_PATH /usr/local/lib/:/usr/local/lib64 +COPY local.rt /opt/route/local.rt +ENV RMR_SEED_RT /opt/route/local.rt + +RUN apk update && apk add gcc musl-dev +RUN pip install ricxappframe +RUN conda update -n base -c defaults conda +RUN conda install pandas +RUN conda install -c conda-forge/label/cf202003 hdbscan +RUN pip install schedule +RUN conda install scikit-learn +#RUN pip install -U scikit-learn + +COPY ad/ /tmp/ad +ENV PYTHONUNBUFFERED 1 +CMD python -W ignore /tmp/ad/main.py + diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..69a2cef --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,29 @@ + + Unless otherwise specified, all software contained herein is licensed + under the Apache License, Version 2.0 (the "Software License"); + you may not use this software except in compliance with the Software + License. You may obtain a copy of the Software License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the Software License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the Software License for the specific language governing permissions + and limitations under the Software License. + + + + Unless otherwise specified, all documentation contained herein is licensed + under the Creative Commons License, Attribution 4.0 Intl. (the + "Documentation License"); you may not use this documentation except in + compliance with the Documentation License. You may obtain a copy of the + Documentation License at + + https://creativecommons.org/licenses/by/4.0/ + + Unless required by applicable law or agreed to in writing, documentation + distributed under the Documentation License is distributed on an "AS IS" + BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied. See the Documentation License for the specific language governing + permissions and limitations under the Documentation License. diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..2e55350 --- /dev/null +++ b/README.txt @@ -0,0 +1,45 @@ +# ================================================================================== +# Copyright (c) 2020 HCL Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ================================================================================== + +Usage of all the programs and files have been mentioned below for the reference. +Need to update this file each time when there is any modifications in the following components. + +main.py: +* Initiates xapp api and runs the entry() using xapp.run() +* If RF model is not present in the path, run train() to train the model for the prediction. + Call predict function for every 1 second(for now as we are using simulated data). +* Read the input csv file that has both normal and anomalous data. +* Simulate diff UEIDs that participate in the anomaly by randomly selecting records from this scoring data set +* Send the UEID and timestamp for the anomalous entries to the Traffic Steering (rmr with the message type as 30003) +* Get the acknowledgement message from the traffic steering. + +ad_train.py - Read all the csv files in the current path and create trained model(RF) + +processing.py: +It performs the following activities: +* Columns that are not useful for the prediction will be dropped(UEID, Category, & Timestamp) +* Convert integer and float type into numeric data type. +* verify and drop the highly correlated parameters. +* returns UEID, timestamp and category for the anamolous entries. + +ad_model.py: +* Extract all the unique UEID and filters only the randomly selected UEID(this step will be removed when we implement in sdl way of getting the UEID). +* Call Predict method to get the final data for the randomly selected UEID. + +tb_format.py: +* start the preprocessing, processing steps using the keycolumns +* populate current timestamp value for MeasTimestampRF + diff --git a/ad/LabelEncoder b/ad/LabelEncoder new file mode 100644 index 0000000000000000000000000000000000000000..d56e0729a4374a7812f66cedd7454e06e6e1eb3e GIT binary patch literal 571 zcmZ8f!A=`75Y2`pP!k{&%BfO&3>T}wgDXp2EL$UzKE5)6&Y)8X=u4^pxg`ajDSv&zgLy zg_%f;zi|Hte#fC-6N)(}U5c^B$QH4pe@e^~`?`}`QwR+`-77>DVKPi}BD z8h!W^A|C163b`jtDRY(2W~{`An%}lXl%>L0NoTdHiYQKUD}~XnvbCrPw4PXG@iYQS z+PZ_b$eUdVM`TsdSS z&yS%W9l*vIHiL&a?FP2~_W=X`@ENv;*=FcE`6ex!%GVdgR5Jz7vq1n)uZ2`f>S_t^ yWa|NbZ_H$g>_+6HF?NZ(kQMB4`uHj^oA85t@gwq};R>VC_ukOMBYJ=}r+)#fjmriA literal 0 HcmV?d00001 diff --git a/ad/__init__.py b/ad/__init__.py new file mode 100644 index 0000000..d4f2f7e --- /dev/null +++ b/ad/__init__.py @@ -0,0 +1,15 @@ +# ================================================================================== +# Copyright (c) 2020 AT&T Intellectual Property. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ================================================================================== diff --git a/ad/ad_model/__pycache__/ad_model.cpython-38.pyc b/ad/ad_model/__pycache__/ad_model.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36728bcf8d9b02015e002b6b3c47ad343f5438b7 GIT binary patch literal 1957 zcmZWpOK%)S5bo~jnOUzlkN`r7gv1;WWF>GUBo0VX6tJC#lvu$@1X>A=dZ&Bsaptk< znF+C3b8)VSzpy@X;lzc%(pOIT3!M0>$99%5yHi#Ds;>E}zIuA;(vab~Tl|s!wZ_=r zG+2Ek2DkAxParhYykL`F^NH}|U?RN^C!yETB=S0*#CMq%I(Wr&U24sp{JE?6whg7W^3nv>>%q%7CcVS0CW@eLZb5RgG6SZqe zv-^4SR%();@^*jKHKIVeyQ1BA6Q~E;Fz}@mBg39DWkgjo_)o> zW4r7Jw#R&B$u0Kdtz(G_3aNr-;u>3sHaZ(i8lUiM>{(=|i(nb+@RW1*e8bz(xs6JT zcaGLOlQw2=kP1bW$Iq5frl)A=AYmmV(ue5pTf}+k`(6rV? z#QjPGMWJVtH9CN>jCySXFhYibKcq73>H$7iV>eK4Ug8=o5x?UF8zV`h<_f zh}$t%{s-d)9q(ZkAb@Zn9&g1GP0bS%vl@Ub#4|v1%vwNqLL?2Ap%&O7e+dAeJRxcl zA&G|a=;sS^Y`;V`W^JO1!qcFu^5>oDTj9l~O zEE2HKPl1WLE{nqieavJnSVf)DJ3MQ7yLVjtO^??&?&qTZau@ZdU6k$Dd1ict^%Y4V zjYIn>)FeiJi@a%_=nPUab)lZ7diFN*>-V<4O&{%UZ{6Q~@-@!&=!Zm=%jk+jE(rZE z$?<3spYgfT#t`Cr6^L|z$xZsT&-0bOg Si(L2^s3)KO#N`hM>;D3o%iPoe literal 0 HcmV?d00001 diff --git a/ad/ad_model/__pycache__/processing.cpython-38.pyc b/ad/ad_model/__pycache__/processing.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..50cd269830c7787aacc09bc4229ea9d5b343326e GIT binary patch literal 2540 zcmZuzOK%)S5bo}I?#p(bih)A}k;q11CnT7Q2|)=#NU%b2BB52#XuLf(vz}MFXGv_W zPYxnE1jIkEPbdfe1V3Z0oO0rTxNw25de-)C@T{k*r=MR{eO2uTolcYC`RUi+g1u?3N^P@hM5eNTpNT*qy{u?`VsiJ zg;#$G5gBNGA-NP!8Q@vchPLYi`;PRak8f8tWE0wxE!l?lWk=3IH{`rrfNsiDfONWQ z7Ah)KK8Um)&^d<>waG2F@#@PEV?JbK7I0_*Z9&_FVN(!-7u4rShCQ8LWk2}ri#yf) zS5c_%$7!U?FfH!geZ{fOOIRO1iFMaiETqwHII45xLTZ|WsUDc6^CV3;zR7%ba#1e$(T#;1=~ol` ziEoTA@SVjeLucpBSW9uT@EU8+qA`gRY@V{t>M5G28W4wIU7}eQstYtg#G3NGM`J-2 zzJcyT&!3(GG#Q>jLVhR45E^|S!mxxNfGG#;IU}}|&d_~k;z79rK6%o5A)d1z*#@Wh zJoTTU?+)AJlPLL!l8ZP|X4TyCd$0b*1LyNCg6S1Y&P9tYWjT&XD!HCK~Z zmxDYlZoWgu5aV1X*aF+RGUX^*6(xqnORLC(Fwr;BI9zJ-q>f6%P)NOxeSlzHE22Zy zxC3I`RUyiP$WRnY6(@>Hk?KH97-rfvw)berq)%(==!@vpl#-0k@wfO{zH|O$j5BVe zKpvI96?~Lzk0CI8pHfI1@Lf{^#uQ31bR;)n5~aKEA#CD6(0u^sf_(2{b>(O(?Ok8d zrv1(a>Qbq~OmE~Wt&Jy>)m0K?V&x9=^(0R8|^i6TFo( zDvC%|PLe;4CIm9cR@b-;wuec)6J4Wo>jK^6%~`}plV%W8LLY_aoD>!A&+kb`v< zQYJSOEz;zw`X=&)&(GtxGhJRLQ+wSO-C!Bm9|To1pet^aP`e!jk49lqkEqjhra^QJ z@H+JwfUDH{n8b0BJBom!ibYjXQy0LvzU@1ZzTfa&GuoZ2+=Bi$nWUtHYFun93Ru8&IRRkkD}suQ`6=`vHqvg5JiJj9i4GNW?y^&xH% bbNES^D}S|i#7&}r@g?2qZF94?d0YGku2na~ literal 0 HcmV?d00001 diff --git a/ad/ad_model/__pycache__/tb_format.cpython-38.pyc b/ad/ad_model/__pycache__/tb_format.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2dda52be7052ca8818333286f5e2cf359d1e5d8d GIT binary patch literal 917 zcmZ8fOK%e~5VrS`Y_?5X1Qhjxd?XSyvf-?eE#yS_uVDrN8#L+8agj=s3TNB zfJP*sXsyVK?ZBpsm=ic<&JA3!!2uU4Pz4Wa;KRzO7F1vr>K{o^1==BvwV#MZP8!ts znG~5!2SO>c{#tN#7REwp9%t>Npx!+^J=j0pn`xmATK~A+Rm>{UQq%*q`h<0cRIc@SyJHFQ1iiHVzf*N9o|4O~WW+ zeZk@s!eJb1@sFQ649136iB*9&zNm1RCU0iUdLbUJAS5 zQ7+gpmF&OED(hvz?4|qc`A)1FwsFgrOf?fSOhFlED8*Z8 0.98)] + self.data = self.data.drop(drop,axis=1) + + #check skewness of all parameters and use log transform if half of parameters are enough skewd + #otherwise use standardization + def transform(self): + """ Use standard scalar and save the scale """ + scale = StandardScaler() + data = scale.fit_transform(self.data) + self.data = pd.DataFrame(data, columns = self.data.columns) + joblib.dump(scale, '/tmp/ad/scale') + + def normalize(self): + """ normalize the data """ + upper = self.data.max() + lower = self.data.min() + self.data = (self.data - lower)/(upper-lower) + + def process(self): + """ Calls the modules for the data preprocessing like dropping columns, normalization etc., """ + self.numerical_data() + self.drop_na() + self.variation() +# self.correlation() + self.transform() + self.data.loc[:,'UEID'] = self.id + self.data.loc[:,'MeasTimestampRF'] = self.time + return self.data diff --git a/ad/ad_model/tb_format.py b/ad/ad_model/tb_format.py new file mode 100644 index 0000000..91f3c08 --- /dev/null +++ b/ad/ad_model/tb_format.py @@ -0,0 +1,36 @@ +# ================================================================================== +# Copyright (c) 2020 HCL Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ================================================================================== + +import pandas as pd +from ad_model.processing import preprocess +import json, datetime + +UEKeyList = ['MeasTimestampRF','UEPDCPBytesDL', 'UEPDCPBytesUL', 'UEPRBUsageDL', 'UEPRBUsageUL','S_RSRP', 'S_RSRQ', 'S_SINR','UEID'] + +def parse(df): + """ + This block will be modified when we are going to fetch the data from database via sdl api. + + start the preprocessing, processing steps using the keycolumns + populates the current timestamp value for MeasTimestampRF + """ + df.index = range(df.shape[0]) + df = df[UEKeyList] + db = preprocess(df) + df = db.process() + del db + df['MeasTimestampRF'] = pd.date_range(start = datetime.datetime.now(), periods = len(df), freq = '-10ms') + return df diff --git a/ad/ad_train.py b/ad/ad_train.py new file mode 100644 index 0000000..b6aa843 --- /dev/null +++ b/ad/ad_train.py @@ -0,0 +1,137 @@ +# ================================================================================== +# Copyright (c) 2020 HCL Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ================================================================================== +import warnings +import json +import hdbscan +import pandas as pd +import numpy as np +import joblib, os +from ad_model.processing import preprocess +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import accuracy_score, confusion_matrix,f1_score +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split + +# Ranges for input features based on excellent, good, average, & poor category +UEKeyList = ['MeasTimestampRF','UEPDCPBytesDL', 'UEPDCPBytesUL', 'UEPRBUsageDL', 'UEPRBUsageUL', 'S_RSRP', 'S_RSRQ', 'S_SINR','UEID'] +#UEKeyList = ['S_RSRP', 'S_RSRQ', 'S_SINR','UEID','MeasTimestampRF'] + +sigstr = {'S_RSRP': {'Excellent Signal' : [-80, 10000000000000000], 'Good Signal': [-90,-80], 'Average Signal':[-100,-90], 'Poor Signal':[-100000000000000000,-100]}, 'S_RSRQ' : {'Excellent Signal' : [-10, 10000000000000000], 'Good Signal': [-15,-10], 'Average Signal':[-20,-15], 'Poor Signal':[-100000000000000000,-20]}, 'S_SINR' : {'Excellent Signal' : [20, 10000000000000000], 'Good Signal': [13,20], 'Average Signal':[0,13], 'Poor Signal':[-100000000000000000,0]}} + +PRB = {'UEPRBUsageDL': {'Excellent Signal' : [25, 10000000000000000], 'Good Signal': [20,25], 'Average Signal':[10,20], 'Poor Signal':[-100000000000000000,10]}, 'UEPRBUsageUL' : {'Excellent Signal' : [15, 10000000000000000], 'Good Signal': [10,15], 'Average Signal':[5,10], 'Poor Signal':[-100000000000000000,5]}} + +tput = {'UEPDCPBytesDL': {'Excellent Signal' : [300000, 10000000000000000], 'Good Signal': [200000,300000], 'Average Signal':[100000,200000], 'Poor Signal':[-100000000000000000,100000]}, 'UEPDCPBytesUL' : {'Excellent Signal' : [125000, 10000000000000000], 'Good Signal': [100000,125000], 'Average Signal':[10000,100000], 'Poor Signal':[-100000000000000000,10000]}} + + +def category(df,ranges): + """ + Based on ranges, each sample is return with category(excellent, good, average, & poor category). + """ + data = df.copy() + for block in ranges: + df = data[list(block.keys())].copy() + for key, value in block.items(): + temp = data[list(block.keys())].copy() + for cat, bounds in value.items(): + ind = temp[(temp[key] <= bounds[1]) & (temp[key] > bounds[0])].index + df.loc[ind, key] = cat + data[df.columns] = df + category = data[['UEPDCPBytesDL', 'UEPDCPBytesUL', 'UEPRBUsageDL', 'UEPRBUsageUL', + 'S_RSRP', 'S_RSRQ', 'S_SINR']].mode(axis = 1)[0] + return category + + +class modelling(object): + def __init__(self,data): + self.time = data.MeasTimestampRF + self.id = data.UEID + self.data = data.drop(['UEID', 'MeasTimestampRF'], axis = 1) + + def dbscan(self): + """ + Train hdbscan for the input dataframe + save the hdbscan model + """ + df = self.data.copy() + hdb = hdbscan.HDBSCAN(min_cluster_size=16000, min_samples = 5, prediction_data = True).fit(df) + joblib.dump(hdb, '/tmp/ad/hdbscan') + self.data['Category'] = hdb.labels_ + + def RandomForest(self, y): + """ + Transform categorical label into numeric(Save the LabelEncoder). + Create Train and Test split for Random Forest Classifier and Save the model + """ + df = self.data.copy() + le = LabelEncoder() + y = le.fit_transform(y) + joblib.dump(le, '/tmp/ad/LabelEncoder') + X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.20, stratify=y, random_state=42) + rf = RandomForestClassifier(max_depth=9, random_state=0) + rf.fit(X_train, y_train) + + joblib.dump(rf, '/tmp/ad/RF') + print('--------------------------- Training Score------------------------------------') + score(X_test, y_test, rf) + print('--------------------------- Test Score------------------------------------') + test = pd.read_csv('/tmp/ad/ue_test.csv') + test = test[UEKeyList] + y = category(test, [sigstr, PRB, tput]) + y =le.transform(y) + ps = preprocess(test) + ps.process() + test = ps.data.drop(['UEID', 'MeasTimestampRF'], axis = 1) + score(test, y, rf) + +def score(X, y, model): + y_pred = model.predict(X) + print('Accuracy : {}'.format(accuracy_score(y, y_pred))) + + print('confusion matrix : {}'.format(confusion_matrix(y, y_pred))) + print('f1-score : {}'.format(f1_score(y, y_pred, average = 'macro'))) + + +def train(): + """ + Main function to perform training on input files + Read all the csv file in the current path and create trained model + """ + print('Training Starts : ') + path = '/tmp/ad/ue_data/' + df = pd.DataFrame() + # Read all the csv files and store the combined data into df + for file in os.listdir(path): + df = df.append(pd.read_csv(path + file)) + + df = df[UEKeyList] + df.index = range(len(df)) + y = category(df, [sigstr, PRB, tput]) + seg = {} + + #Save the category of each UEID and save it as json file + for ue in df.UEID.unique(): + seg[str(ue)] = list(set(y[df[df['UEID'] == ue].index])) + + with open('ue_seg.json', 'w') as outfile: + json.dump(seg, outfile) + + # Do a preprocessing, processing and save the model + ps = preprocess(df) + ps.process() + df = ps.data + db = modelling(df) +# db.dbscan() + db.RandomForest(y) diff --git a/ad/main.py b/ad/main.py new file mode 100644 index 0000000..9be6dc9 --- /dev/null +++ b/ad/main.py @@ -0,0 +1,73 @@ +# ================================================================================== +# Copyright (c) 2020 HCL Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ================================================================================== + +import warnings +import json +import os +from ricxappframe.xapp_frame import Xapp +import pandas as pd +from ad_model.tb_format import parse +from ad_model.ad_model import HDB_PREDICT +import schedule, time +from ad_train import train + +def entry(self): + """ + If RF model is not present in the path, run train() to train the model for the prediction. + Calls predict function for every 1 second(for now as we are using simulated data). + """ + if not os.path.isfile('/tmp/ad/RF'): + train() + schedule.every(1).seconds.do(predict, self) + while True: + schedule.run_pending() + +def predict(self): + """ + Read the input csv file that has both normal and anomalous data. + Simulate diff UEIDs that participate in the anomaly by randomly selecting records from this scoring data set + Send the UEID and timestamp for the anomalous entries to the Traffic Steering (rmr with the message type as 30003) + Get the acknowledgement message from the traffic steering. + """ + + #The read_csv logic will be modified when we are going to fetch the data from database via sdl api. + #Read the input csv file + ue_data = pd.read_csv('/tmp/ad/ue_test.csv') + + #Parse the ue data and predict the anomaly records for the randomly selected UEID + data = parse(ue_data) + db_df = HDB_PREDICT(data) + db_df = db_df.loc[db_df['Anomaly'] == 1][['UEID','MeasTimestampRF' ]].head(1) + db_df['MeasTimestampRF'] = db_df['MeasTimestampRF'].apply(lambda x : str(x)) # converts into string format + #print("db_df: ", db_df) # For debug purpose, we can enable this print statement + + # rmr send 30003(TS_ANOMALY_UPDATE), should trigger registered callback + result = json.loads(db_df.to_json(orient = 'records')) + val = json.dumps(result).encode() + + if len(val) > 2 : + print("val: ", val) + self.rmr_send(val, 30003) + + # rmr receive to get the acknowledgement message from the traffic steering. + for (summary, sbuf) in self.rmr_get_messages(): + print("TS_ANOMALY_ACK: {}".format(summary)) + self.rmr_free(sbuf) + +# Initiates xapp api and runs the entry() using xapp.run() +xapp = Xapp(entrypoint=entry, rmr_port=4564, use_fake_sdl=True) +xapp.run() + diff --git a/ad/scale b/ad/scale new file mode 100644 index 0000000000000000000000000000000000000000..78b3af3730c74d2f26e049e0531ec9f71a55ceda GIT binary patch literal 715 zcmZo*nL2@y0StPii?ef56N~cn3W`z-it>|Fi;FY!()Hp~5=#=N^za3jB<7_g7NrCy zC+4IUO_@AJvvEr86pbFv^30Nq_}tXQyeS<$9AHjyNeYn7lAK>q38eY*;?q(SOG=AU zi{mr%;-`4C_weV%7boTxtO{e0n#ZYl?ACF5vELwDc%g;jO|l`rf7IG_b}Q_@$>WZ`VRzP!keLFN|G~2 zfC$8gQ`&&)dRReD1==W_m7kQ8nWP6cHNGG-8R+{dJ%WB9cQ}G94=+kAC_wfpM{#LV za!z7#@steS9(GV*fSd?&S#d@p&_&+ttw45uQ3_B~52G{Or5XG^Olc@C%@D?PXAf6m zPELM#d~R-H!IX~DB=aI=^V|YfM^)MR`??l?cQ8o5`Nx>e)S+=r;+Fd%QVvr}YJS~{ z3q9~XcN>??K8*u;Puze=(_yL_#J3DREM=1.1.1,<2.0.0"], + #entry_points={"console_scripts": ["run-ad.py=ad.main"]}, # adds a magical entrypoint for Docker + license="Apache 2.0", + data_files=[("", ["LICENSE.txt"])], +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..279cb61 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"ric_app_ad" diff --git a/tests/fixtures/test_local.rt b/tests/fixtures/test_local.rt new file mode 100644 index 0000000..79a4c36 --- /dev/null +++ b/tests/fixtures/test_local.rt @@ -0,0 +1,18 @@ +# ================================================================================== +# Copyright (c) 2020 HCL Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ================================================================================== +newrt|start +rte|40001|127.0.0.1:4562 +newrt|end diff --git a/tests/testad.py b/tests/testad.py new file mode 100644 index 0000000..af67ad0 --- /dev/null +++ b/tests/testad.py @@ -0,0 +1,34 @@ +# ================================================================================== +# Copyright (c) 2020 HCL Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ================================================================================== +from ricxappframe.xapp_frame import Xapp +#from ad import main + +def test_init_adxapp(monkeypatch): + + # start ad + #main.predict() + + # rmr send 30003(TS_ANOMALY_UPDATE), should trigger registered callback + + val = '[{"UEID": 12419, "MeasTimestampRF": "2020-11-11 13:28:25.135743"}]' + self.rmr_send(val, 30003) + + # rmr receive to get the acknowledgement message from the traffic steering. + for (summary, sbuf) in self.rmr_get_messages(): + print("TS_ANOMALY_ACK: {}".format(summary)) + self.rmr_free(sbuf) + + diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..75f1f8a --- /dev/null +++ b/tox.ini @@ -0,0 +1,73 @@ +# ================================================================================== +# Copyright (c) 2020 AT&T Intellectual Property. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ================================================================================== +[tox] +envlist = code,flake8,docs,docs-linkcheck +minversion = 2.0 + +[testenv:code] +basepython = python3.8 +deps= + pytest + coverage + pytest-cov +setenv = + LD_LIBRARY_PATH = /usr/local/lib/:/usr/local/lib64 + RMR_SEED_RT = tests/fixtures/test_local.rt + RMR_ASYNC_CONN = 0 + USE_FAKE_SDL = 1 + +commands = + pytest -v --cov ad --cov-report xml --cov-report term-missing --cov-report html --cov-fail-under=70 + coverage xml -i + +[testenv:flake8] +basepython = python3.8 +skip_install = true +deps = flake8 +commands = flake8 setup.py ad tests + +[flake8] +extend-ignore = E501,E741,E731 + +[testenv:clm] +# use pip to gather dependencies with versions for CLM analysis +whitelist_externals = sh +commands = sh -c 'pip freeze > requirements.txt' + +# doc jobs +[testenv:docs] +whitelist_externals = echo +skipsdist = true +basepython = python3.8 +deps = + sphinx + sphinx-rtd-theme + sphinxcontrib-httpdomain + recommonmark + lfdocs-conf +commands = + sphinx-build -W -b html -n -d {envtmpdir}/doctrees ./docs/ {toxinidir}/docs/_build/html + echo "Generated docs available in {toxinidir}/docs/_build/html" + +[testenv:docs-linkcheck] +skipsdist = true +basepython = python3.8 +deps = sphinx + sphinx-rtd-theme + sphinxcontrib-httpdomain + recommonmark + lfdocs-conf +commands = sphinx-build -W -b linkcheck -d {envtmpdir}/doctrees ./docs/ {toxinidir}/docs/_build/linkcheck diff --git a/xapp-descriptor/config.json b/xapp-descriptor/config.json new file mode 100644 index 0000000..ae48a24 --- /dev/null +++ b/xapp-descriptor/config.json @@ -0,0 +1,43 @@ +{ + "xapp_name": "ad", + "version": "0.0.1", + "containers": [ + { + "name": "ad", + "image": { + "registry": "nexus3.o-ran-sc.org:10002", + "name": "o-ran-sc/ric-app-ad", + "tag": "0.0.1" + } + } + ], + "messaging": { + "ports": [ + { + "name": "rmr-data", + "container": "ad", + "port": 4560, + "txMessages": ["TS_ANOMALY_UPDATE"], + "rxMessages": ["TS_ANOMALY_ACK"], + "policies": [], + "description": "rmr receive data port for ad" + }, + { + "name": "rmr-route", + "container": "ad", + "port": 4561, + "description": "rmr route port for ad" + } + ] + }, + "rmr": { + "protPort": "tcp:4560", + "maxSize": 2072, + "numWorkers": 1, + "rxMessages": ["TS_ANOMALY_ACK"], + "txMessages": ["TS_ANOMALY_UPDATE"], + "policies": [] + } + +} + -- 2.16.6