Revise Alarm manager to send via RMR wormhole
[ric-plt/xapp-frame-py.git] / ricxappframe / alarm / alarm.py
1 # ==================================================================================
2 #       Copyright (c) 2020 AT&T Intellectual Property.
3 #       Copyright (c) 2020 Nokia
4 #
5 #   Licensed under the Apache License, Version 2.0 (the "License");
6 #   you may not use this file except in compliance with the License.
7 #   You may obtain a copy of the License at
8 #
9 #          http://www.apache.org/licenses/LICENSE-2.0
10 #
11 #   Unless required by applicable law or agreed to in writing, software
12 #   distributed under the License is distributed on an "AS IS" BASIS,
13 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 #   See the License for the specific language governing permissions and
15 #   limitations under the License.
16 # ==================================================================================
17 """
18 Provides classes and methods to define, raise, reraise and clear alarms.
19 All actions are implemented by sending RMR messages to the Alarm Adapter.
20 The alarm target host and port are set by environment variables. The alarm
21 message contents comply with the JSON schema in file alarm-schema.json.
22 """
23
24 from ctypes import c_void_p
25 from enum import Enum, auto
26 import json
27 import os
28 import time
29 from mdclogpy import Logger
30 from ricxappframe.rmr import rmr
31 from ricxappframe.alarm.exceptions import InitFailed
32
33 ##############
34 # PRIVATE API
35 ##############
36
37 mdc_logger = Logger(name=__name__)
38 RETRIES = 4
39
40 ##############
41 # PUBLIC API
42 ##############
43
44 # constants
45 RIC_ALARM_UPDATE = 110
46 ALARM_MGR_SERVICE_NAME_ENV = "ALARM_MGR_SERVICE_NAME"
47 ALARM_MGR_SERVICE_PORT_ENV = "ALARM_MGR_SERVICE_PORT"
48
49 # Publish dict keys as constants for convenience of client code.
50 # Mixed lower/upper casing to comply with the Adapter JSON requirements.
51 KEY_ALARM = "alarm"
52 KEY_MANAGED_OBJECT_ID = "managedObjectId"
53 KEY_APPLICATION_ID = "applicationId"
54 KEY_SPECIFIC_PROBLEM = "specificProblem"
55 KEY_PERCEIVED_SEVERITY = "perceivedSeverity"
56 KEY_ADDITIONAL_INFO = "additionalInfo"
57 KEY_IDENTIFYING_INFO = "identifyingInfo"
58 KEY_ALARM_ACTION = "AlarmAction"
59 KEY_ALARM_TIME = "AlarmTime"
60
61
62 class AlarmAction(Enum):
63     """
64     Action to perform at the Alarm Adapter
65     """
66     RAISE = auto()
67     CLEAR = auto()
68     CLEARALL = auto()
69
70
71 class AlarmSeverity(Enum):
72     """
73     Severity of an alarm
74     """
75     UNSPECIFIED = auto()
76     CRITICAL = auto()
77     MAJOR = auto()
78     MINOR = auto()
79     WARNING = auto()
80     CLEARED = auto()
81     DEFAULT = auto()
82
83
84 class AlarmDetail(dict):
85     """
86     An alarm that can be raised or cleared.
87
88     Parameters
89     ----------
90     managed_object_id: str
91         The name of the managed object that is the cause of the fault (required)
92
93     application_id: str
94         The name of the process that raised the alarm (required)
95
96     specific_problem: int
97         The problem that is the cause of the alarm
98
99     perceived_severity: AlarmSeverity
100         The severity of the alarm, a value from the enum.
101
102     identifying_info: str
103         Identifying additional information, which is part of alarm identity
104
105     additional_info: str
106         Additional information given by the application (optional)
107     """
108     # pylint: disable=too-many-arguments
109     def __init__(self,
110                  managed_object_id: str,
111                  application_id: str,
112                  specific_problem: int,
113                  perceived_severity: AlarmSeverity,
114                  identifying_info: str,
115                  additional_info: str = ""):
116         """
117         Creates an object with the specified items.
118         """
119         dict.__init__(self)
120         self[KEY_MANAGED_OBJECT_ID] = managed_object_id
121         self[KEY_APPLICATION_ID] = application_id
122         self[KEY_SPECIFIC_PROBLEM] = specific_problem
123         self[KEY_PERCEIVED_SEVERITY] = perceived_severity.name
124         self[KEY_IDENTIFYING_INFO] = identifying_info
125         self[KEY_ADDITIONAL_INFO] = additional_info
126
127
128 class AlarmManager:
129     """
130     Provides an API for an Xapp to raise and clear alarms by sending messages
131     via RMR directly to an Alarm Adapter. Requires environment variables
132     ALARM_MGR_SERVICE_NAME and ALARM_MGR_SERVICE_PORT with the destination host
133     (service) name and port number; raises an exception if not found.
134
135     Parameters
136     ----------
137     vctx: ctypes c_void_p
138         Pointer to RMR context obtained by initializing RMR.
139         The context is used to allocate space and send messages.
140
141     managed_object_id: str
142         The name of the managed object that raises alarms
143
144     application_id: str
145         The name of the process that raises alarms
146     """
147     def __init__(self,
148                  vctx: c_void_p,
149                  managed_object_id: str,
150                  application_id: str):
151         """
152         Creates an alarm manager.
153         """
154         self.vctx = vctx
155         self.managed_object_id = managed_object_id
156         self.application_id = application_id
157         service = os.environ.get(ALARM_MGR_SERVICE_NAME_ENV, None)
158         port = os.environ.get(ALARM_MGR_SERVICE_PORT_ENV, None)
159         if service is None or port is None:
160             mdc_logger.error("init: missing env var(s) {0}, {1}".format(ALARM_MGR_SERVICE_NAME_ENV, ALARM_MGR_SERVICE_PORT_ENV))
161             raise InitFailed
162         target = "{0}:{1}".format(service, port)
163         self._wormhole_id = rmr.rmr_wh_open(self.vctx, target.encode('utf-8'))
164         if rmr.rmr_wh_state(self.vctx, self._wormhole_id) != rmr.RMR_OK:
165             mdc_logger.error("init: failed to open wormhole to target {}".format(target))
166             raise InitFailed
167
168     def create_alarm(self,
169                      specific_problem: int,
170                      perceived_severity: AlarmSeverity,
171                      identifying_info: str,
172                      additional_info: str = ""):
173         """
174         Convenience method that creates an alarm instance, an AlarmDetail object,
175         using cached values for the managed object ID and application ID.
176
177         Parameters
178         ----------
179         specific_problem: int
180             The problem that is the cause of the alarm
181
182         perceived_severity: AlarmSeverity
183             The severity of the alarm, a value from the enum.
184
185         identifying_info: str
186             Identifying additional information, which is part of alarm identity
187
188         additional_info: str
189             Additional information given by the application (optional)
190
191         Returns
192         -------
193         AlarmDetail
194         """
195         return AlarmDetail(managed_object_id=self.managed_object_id,
196                            application_id=self.application_id,
197                            specific_problem=specific_problem, perceived_severity=perceived_severity,
198                            identifying_info=identifying_info, additional_info=additional_info)
199
200     @staticmethod
201     def _create_alarm_message(alarm: AlarmDetail, action: AlarmAction):
202         """
203         Creates a dict with the specified alarm detail plus action and time.
204         Uses the current system time in milliseconds since the Epoch.
205
206         Parameters
207         ----------
208         detail: AlarmDetail
209             The alarm details.
210
211         action: AlarmAction
212             The action to perform at the Alarm Adapter on this alarm.
213         """
214         return {
215             **alarm,
216             KEY_ALARM_ACTION: action.name,
217             KEY_ALARM_TIME: int(round(time.time() * 1000))
218         }
219
220     def _rmr_send_alarm(self, msg: dict):
221         """
222         Serializes the dict and sends the result via RMR using a predefined message
223         type to the wormhole initialized at start.
224
225         Parameters
226         ----------
227         msg: dict
228             Dictionary with alarm message to encode and send
229
230         Returns
231         -------
232         bool
233             True if the send succeeded (possibly with retries), False otherwise
234         """
235         payload = json.dumps(msg).encode()
236         mdc_logger.debug("_rmr_send_alarm: payload is {}".format(payload))
237         sbuf = rmr.rmr_alloc_msg(vctx=self.vctx, size=len(payload), payload=payload,
238                                  mtype=RIC_ALARM_UPDATE, gen_transaction_id=True)
239
240         for _ in range(0, RETRIES):
241             sbuf = rmr.rmr_wh_send_msg(self.vctx, self._wormhole_id, sbuf)
242             post_send_summary = rmr.message_summary(sbuf)
243             mdc_logger.debug("_rmr_send_alarm: try {0} result is {1}".format(_, post_send_summary[rmr.RMR_MS_MSG_STATE]))
244             # stop trying if RMR does not indicate retry
245             if post_send_summary[rmr.RMR_MS_MSG_STATE] != rmr.RMR_ERR_RETRY:
246                 break
247
248         rmr.rmr_free_msg(sbuf)
249         if post_send_summary[rmr.RMR_MS_MSG_STATE] != rmr.RMR_OK:
250             mdc_logger.warning("_rmr_send_alarm: failed after {} retries".format(RETRIES))
251             return False
252
253         return True
254
255     def raise_alarm(self, detail: AlarmDetail):
256         """
257         Builds and sends a message to the AlarmAdapter to raise an alarm
258         with the specified detail.
259
260         Parameters
261         ----------
262         detail: AlarmDetail
263             Alarm to raise
264
265         Returns
266         -------
267         bool
268             True if the send succeeded (possibly with retries), False otherwise
269         """
270         msg = self._create_alarm_message(detail, AlarmAction.RAISE)
271         return self._rmr_send_alarm(msg)
272
273     def clear_alarm(self, detail: AlarmDetail):
274         """
275         Builds and sends a message to the AlarmAdapter to clear the alarm
276         with the specified detail.
277
278         Parameters
279         ----------
280         detail: AlarmDetail
281             Alarm to clear
282
283         Returns
284         -------
285         bool
286             True if the send succeeded (possibly with retries), False otherwise
287         """
288         msg = self._create_alarm_message(detail, AlarmAction.CLEAR)
289         return self._rmr_send_alarm(msg)
290
291     def reraise_alarm(self, detail: AlarmDetail):
292         """
293         Builds and sends a message to the AlarmAdapter to clear the alarm with the
294         the specified detail, then builds and sends a message to raise the alarm again.
295
296         Parameters
297         ----------
298         detail: AlarmDetail
299             Alarm to clear and raise again.
300
301         Returns
302         -------
303         bool
304             True if the send succeeded (possibly with retries), False otherwise
305         """
306         success = self.clear_alarm(detail)
307         if success:
308             success = self.raise_alarm(detail)
309         return success
310
311     def clear_all_alarms(self):
312         """
313         Builds and sends a message to the AlarmAdapter to clear all alarms.
314
315         Returns
316         -------
317         bool
318             True if the send succeeded (possibly with retries), False otherwise
319         """
320         detail = self.create_alarm(0, AlarmSeverity.DEFAULT, "", "")
321         msg = self._create_alarm_message(detail, AlarmAction.CLEARALL)
322         return self._rmr_send_alarm(msg)