X-Git-Url: https://gerrit.o-ran-sc.org/r/gitweb?a=blobdiff_plain;f=a1%2Fa1rmr.py;h=58ec1c06958ac80460805059b904863f54e45321;hb=refs%2Ftags%2F2.1.4;hp=d6114bf38e232112a91dfddb445eae1672e338d2;hpb=6b69910923309e05820706dc025e1441463906c9;p=ric-plt%2Fa1.git diff --git a/a1/a1rmr.py b/a1/a1rmr.py index d6114bf..58ec1c0 100644 --- a/a1/a1rmr.py +++ b/a1/a1rmr.py @@ -1,6 +1,9 @@ +""" +a1s rmr functionality +""" # ================================================================================== -# Copyright (c) 2019 Nokia -# Copyright (c) 2018-2019 AT&T Intellectual Property. +# Copyright (c) 2019-2020 Nokia +# Copyright (c) 2018-2020 AT&T Intellectual Property. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,153 +21,197 @@ import os import queue import time import json +from threading import Thread from rmr import rmr, helpers -from a1 import get_module_logger -from a1 import data +from mdclogpy import Logger +from a1 import data, messages from a1.exceptions import PolicyTypeNotFound, PolicyInstanceNotFound -logger = get_module_logger(__name__) +mdc_logger = Logger(name=__name__) -RETRY_TIMES = int(os.environ.get("RMR_RETRY_TIMES", 4)) +RETRY_TIMES = int(os.environ.get("A1_RMR_RETRY_TIMES", 4)) +A1_POLICY_REQUEST = 20010 +A1_POLICY_RESPONSE = 20011 +A1_POLICY_QUERY = 20012 -_SEND_QUEUE = queue.Queue() # thread safe queue https://docs.python.org/3/library/queue.html +# Note; yes, globals are bad, but this is a private (to this module) global +# No other module can import/access this (well, python doesn't enforce this, but all linters will complain) +__RMR_LOOP__ = None -def _init_rmr(): + +class _RmrLoop: """ - init an rmr context - This gets monkeypatched out for unit testing + class represents an rmr loop that constantly reads from rmr and performs operations based on waiting messages + this launches a thread, it should probably only be called once; the public facing method to access these ensures this """ - # rmr.RMRFL_MTCALL puts RMR into a multithreaded mode, where a receiving thread populates an - # internal ring of messages, and receive calls read from that - # currently the size is 2048 messages, so this is fine for the foreseeable future - mrc = rmr.rmr_init(b"4562", rmr.RMR_MAX_RCV_BYTES, rmr.RMRFL_MTCALL) - - while rmr.rmr_ready(mrc) == 0: - time.sleep(0.5) - return mrc + def __init__(self, init_func_override=None, rcv_func_override=None): + self.keep_going = True + self.rcv_func = None + self.last_ran = time.time() + + # see docs/overview#resiliency for a discussion of this + self.instance_send_queue = queue.Queue() # thread safe queue https://docs.python.org/3/library/queue.html + + # intialize rmr context + if init_func_override: + self.mrc = init_func_override() + else: + mdc_logger.debug("Waiting for rmr to initialize..") + # rmr.RMRFL_MTCALL puts RMR into a multithreaded mode, where a receiving thread populates an + # internal ring of messages, and receive calls read from that + # currently the size is 2048 messages, so this is fine for the foreseeable future + self.mrc = rmr.rmr_init(b"4562", rmr.RMR_MAX_RCV_BYTES, rmr.RMRFL_MTCALL) + while rmr.rmr_ready(self.mrc) == 0: + time.sleep(0.5) + + # set the receive function + self.rcv_func = ( + rcv_func_override + if rcv_func_override + else lambda: helpers.rmr_rcvall_msgs_raw(self.mrc, [A1_POLICY_RESPONSE, A1_POLICY_QUERY]) + ) + + # start the work loop + self.thread = Thread(target=self.loop) + self.thread.start() + + def _assert_good_send(self, sbuf, pre_send_summary): + """ + common helper function for _send_msg and _rts_msg + """ + post_send_summary = rmr.message_summary(sbuf) + if post_send_summary["message state"] == 0 and post_send_summary["message status"] == "RMR_OK": + return True + mdc_logger.debug("Message NOT sent!") + mdc_logger.debug("Pre-send summary: {0}, Post-send summary: {1}".format(pre_send_summary, post_send_summary)) + return False + def _send_msg(self, pay, mtype, subid): + """ + sends a msg + """ + for _ in range(0, RETRY_TIMES): + sbuf = rmr.rmr_alloc_msg(self.mrc, len(pay), payload=pay, gen_transaction_id=True, mtype=mtype, sub_id=subid) + sbuf.contents.sub_id = subid + pre_send_summary = rmr.message_summary(sbuf) + sbuf = rmr.rmr_send_msg(self.mrc, sbuf) # send + if self._assert_good_send(sbuf, pre_send_summary): + rmr.rmr_free_msg(sbuf) # free + break + + def _rts_msg(self, pay, sbuf_rts, mtype): + """ + sends a message using rts + we do not call free here because we may rts many times; it is called after the rts loop + """ + for _ in range(0, RETRY_TIMES): + pre_send_summary = rmr.message_summary(sbuf_rts) + sbuf_rts = rmr.rmr_rts_msg(self.mrc, sbuf_rts, payload=pay, mtype=mtype) + if self._assert_good_send(sbuf_rts, pre_send_summary): + break + return sbuf_rts # in some cases rts may return a new sbuf -def _send(mrc, payload, message_type=0): - """ - Sends a message up to RETRY_TIMES - If the message is sent successfully, it returns the transactionid - Does nothing otherwise - """ - # TODO: investigate moving this below and allocating the space based on the payload size - sbuf = rmr.rmr_alloc_msg(mrc, 4096) - payload = payload if isinstance(payload, bytes) else payload.encode("utf-8") - - # retry RETRY_TIMES to send the message - for _ in range(0, RETRY_TIMES): - # setup the send message - rmr.set_payload_and_length(payload, sbuf) - rmr.generate_and_set_transaction_id(sbuf) - sbuf.contents.state = 0 - sbuf.contents.mtype = message_type - pre_send_summary = rmr.message_summary(sbuf) - logger.debug("Pre message send summary: %s", pre_send_summary) - transaction_id = pre_send_summary["transaction id"] # save the transactionid because we need it later - - # send - sbuf = rmr.rmr_send_msg(mrc, sbuf) - post_send_summary = rmr.message_summary(sbuf) - logger.debug("Post message send summary: %s", rmr.message_summary(sbuf)) + def loop(self): + """ + This loop runs forever, and has 3 jobs: + - send out any messages that have to go out (create instance, delete instance) + - read a1s mailbox and update the status of all instances based on acks from downstream policy handlers + - clean up the database (eg delete the instance) under certain conditions based on those statuses (NOT DONE YET) + """ + # loop forever + mdc_logger.debug("Work loop starting") + while self.keep_going: + + # send out all messages waiting for us + while not self.instance_send_queue.empty(): + work_item = self.instance_send_queue.get(block=False, timeout=None) + payload = json.dumps(messages.a1_to_handler(*work_item)).encode("utf-8") + self._send_msg(payload, A1_POLICY_REQUEST, work_item[1]) + + # read our mailbox + for (msg, sbuf) in self.rcv_func(): + # TODO: in the future we may also have to catch SDL errors + try: + mtype = msg["message type"] + except (KeyError, TypeError, json.decoder.JSONDecodeError): + mdc_logger.debug("Dropping malformed policy ack/query message: {0}".format(msg)) + + if mtype == A1_POLICY_RESPONSE: + try: + # got a policy response, update status + pay = json.loads(msg["payload"]) + data.set_policy_instance_status( + pay["policy_type_id"], pay["policy_instance_id"], pay["handler_id"], pay["status"] + ) + mdc_logger.debug("Successfully received status update: {0}".format(pay)) + except (PolicyTypeNotFound, PolicyInstanceNotFound): + mdc_logger.debug("Received a response for a non-existent instance") + except (KeyError, TypeError, json.decoder.JSONDecodeError): + mdc_logger.debug("Dropping malformed policy ack message: {0}".format(msg)) + + elif mtype == A1_POLICY_QUERY: + try: + # got a query, do a lookup and send out all instances + pti = json.loads(msg["payload"])["policy_type_id"] + mdc_logger.debug("Received query for: {0}".format(pti)) + for pii in data.get_instance_list(pti): + instance = data.get_policy_instance(pti, pii) + payload = json.dumps(messages.a1_to_handler("CREATE", pti, pii, instance)).encode("utf-8") + sbuf = self._rts_msg(payload, sbuf, A1_POLICY_REQUEST) + except (PolicyTypeNotFound, PolicyInstanceNotFound): + mdc_logger.debug("Received a query for a non-existent type: {0}".format(msg)) + except (KeyError, TypeError, json.decoder.JSONDecodeError): + mdc_logger.debug("Dropping malformed policy query message: {0}".format(msg)) + + else: + mdc_logger.debug("Received message type {0} but A1 does not handle this".format(mtype)) + + # we must free each sbuf + rmr.rmr_free_msg(sbuf) + + self.last_ran = time.time() + time.sleep(1) - # check success or failure - if post_send_summary["message state"] == 0 and post_send_summary["message status"] == "RMR_OK": - # we are good - logger.debug("Message sent successfully!") - rmr.rmr_free_msg(sbuf) - return transaction_id - # we failed all RETRY_TIMES - logger.debug("Send failed all %s times, stopping", RETRY_TIMES) - rmr.rmr_free_msg(sbuf) - return None +# Public -def _update_all_statuses(mrc): +def start_rmr_thread(init_func_override=None, rcv_func_override=None): """ - get all waiting messages, and try to parse them as status updates - (currently, those are the only messages a1 should get, this may have to be revisited later) + Start a1s rmr thread """ - for msg in helpers.rmr_rcvall_msgs(mrc, [21024]): - try: - pay = json.loads(msg["payload"]) - data.set_status(pay["policy_type_id"], pay["policy_instance_id"], pay["handler_id"], pay["status"]) - except (PolicyTypeNotFound, PolicyInstanceNotFound, KeyError): - logger.debug("Dropping malformed or non applicable message") - logger.debug(msg) + global __RMR_LOOP__ + if __RMR_LOOP__ is None: + __RMR_LOOP__ = _RmrLoop(init_func_override, rcv_func_override) -# Public +def stop_rmr_thread(): + """ + stops the rmr thread + """ + __RMR_LOOP__.keep_going = False -def queue_work(item): +def queue_instance_send(item): """ push an item into the work queue currently the only type of work is to send out messages """ - _SEND_QUEUE.put(item) + __RMR_LOOP__.instance_send_queue.put(item) -class RmrLoop: +def healthcheck_rmr_thread(seconds=30): """ - class represents an rmr loop meant to be called as a longstanding separate thread + returns a boolean representing whether the rmr loop is healthy, by checking two attributes: + 1. is it running?, + 2. is it stuck in a long (> seconds) loop? """ + return __RMR_LOOP__.thread.is_alive() and ((time.time() - __RMR_LOOP__.last_ran) < seconds) - def __init__(self, real_init=True): - self._rmr_is_ready = False - self._keep_going = True - self._real_init = real_init # useful for unit testing to turn off initialization - - def rmr_is_ready(self): - """returns whether rmr has been initialized""" - return self._rmr_is_ready - - def stop(self): - """sets a flag for the loop to end""" - self._keep_going = False - def loop(self): - """ - This loop runs in an a1 thread forever, and has 3 jobs: - - send out any messages that have to go out (create instance, delete instance) - - read a1s mailbox and update the status of all instances based on acks from downstream policy handlers - - clean up the database (eg delete the instance) under certain conditions based on those statuses (NOT DONE YET) - """ - - # get a context - mrc = None - logger.debug("Waiting for rmr to initialize...") - if self._real_init: - mrc = _init_rmr() - self._rmr_is_ready = True - logger.debug("Rmr is ready") - - # loop forever - logger.debug("Work loop starting") - while self._keep_going: - """ - We never raise an exception here. Log and keep moving - Bugs will eventually be caught be examining logs. - """ - try: - # First, send out all messages waiting for us - while not _SEND_QUEUE.empty(): - work_item = _SEND_QUEUE.get(block=False, timeout=None) - _send(mrc, payload=work_item["payload"], message_type=work_item["msg type"]) - - # Next, update all statuses waiting in a1s mailbox - _update_all_statuses(mrc) - - # TODO: next body of work is to try to clean up the database for any updated statuses - - except Exception as e: - logger.debug("Polling thread encountered an unexpected exception, but it will continue:") - logger.exception(e) - - time.sleep(1) +def replace_rcv_func(rcv_func): + """purely for the ease of unit testing to test different rcv scenarios""" + __RMR_LOOP__.rcv_func = rcv_func