2 * Copyright (c) 2020 AT&T Intellectual Property.
3 * Copyright (c) 2020 Nokia.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 * This source code is part of the near-RT RIC (RAN Intelligent Controller)
18 * platform project (RICP).
32 "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
33 app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
34 clientruntime "github.com/go-openapi/runtime/client"
35 "github.com/go-openapi/strfmt"
36 "github.com/prometheus/alertmanager/api/v2/client"
37 "github.com/prometheus/alertmanager/api/v2/client/alert"
38 "github.com/prometheus/alertmanager/api/v2/models"
39 "github.com/spf13/viper"
42 func (a *AlarmManager) StartAlertTimer() {
43 tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
46 for _, m := range a.activeAlarms {
47 app.Logger.Info("Re-raising alarm: %v", m)
48 a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
54 func (a *AlarmManager) Consume(rp *app.RMRParams) (err error) {
55 app.Logger.Info("Message received!")
57 defer app.Rmr.Free(rp.Mbuf)
59 case alarm.RIC_ALARM_UPDATE:
62 app.Logger.Info("Unknown Message Type '%d', discarding", rp.Mtype)
68 func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, error) {
69 var m alarm.AlarmMessage
70 app.Logger.Info("Received JSON: %s", rp.Payload)
71 if err := json.Unmarshal(rp.Payload, &m); err != nil {
72 app.Logger.Error("json.Unmarshal failed: %v", err)
75 app.Logger.Info("newAlarm: %v", m)
77 return a.ProcessAlarm(&AlarmNotification{m, alarm.AlarmDefinition{}})
80 func (a *AlarmManager) ProcessAlarm(m *AlarmNotification) (*alert.PostAlertsOK, error) {
82 alarmDef := &alarm.AlarmDefinition{}
84 if alarmDef, ok = alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
85 app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
90 idx, found := a.IsMatchFound(m.Alarm)
91 // Suppress duplicate alarms
92 if found && m.AlarmAction == alarm.AlarmActionRaise {
93 app.Logger.Info("Duplicate alarm found, suppressing ...")
94 if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
95 // Duplicate with same severity found
99 // Remove duplicate with different severity
100 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
104 // Clear alarm if found from active alarm list
105 if found && m.AlarmAction == alarm.AlarmActionClear {
106 return a.ProcessClearAlarm(m, alarmDef, idx)
109 // New alarm -> update active alarms and post to Alert Manager
110 if m.AlarmAction == alarm.AlarmActionRaise {
111 return a.ProcessRaiseAlarm(m, alarmDef)
118 func (a *AlarmManager) ProcessRaiseAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition) (*alert.PostAlertsOK, error) {
119 app.Logger.Debug("Raise alarmDef.RaiseDelay = %v, AlarmNotification = %v", alarmDef.RaiseDelay, *m)
121 // RaiseDelay > 0 in an alarm object in active alarm table indicates that raise delay is still ongoing for the alarm
122 m.AlarmDefinition.RaiseDelay = alarmDef.RaiseDelay
123 a.UpdateAlarmFields(a.GenerateAlarmId(), m)
124 a.UpdateActiveAlarmList(m)
127 if alarmDef.RaiseDelay > 0 {
128 timerDelay(alarmDef.RaiseDelay)
130 // Alarm may have been deleted from active alarms table during delay or table index may have changed
131 idx, found := a.IsMatchFound(m.Alarm)
133 // Alarm is not showed in active alarms or alarm history via CLI before RaiseDelay has elapsed, i.e the value is 0
134 a.activeAlarms[idx].AlarmDefinition.RaiseDelay = 0
135 app.Logger.Debug("Raise after delay alarmDef.RaiseDelay = %v, AlarmNotification = %v", alarmDef.RaiseDelay, *m)
138 app.Logger.Debug("Alarm deleted during raise delay. AlarmNotification = %v", *m)
144 m.AlarmDefinition.RaiseDelay = 0
145 a.UpdateAlarmHistoryList(m)
146 a.WriteAlarmInfoToPersistentVolume()
148 // Send alarm notification to NOMA, if enabled
149 if app.Config.GetBool("controls.noma.enabled") {
150 return a.PostAlarm(m)
152 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
155 func (a *AlarmManager) ProcessClearAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition, idx int) (*alert.PostAlertsOK, error) {
156 app.Logger.Debug("Clear alarmDef.ClearDelay = %v, AlarmNotification = %v", alarmDef.ClearDelay, *m)
157 if alarmDef.ClearDelay > 0 {
159 timerDelay(alarmDef.ClearDelay)
160 app.Logger.Debug("Clear after delay alarmDef.ClearDelay = %v, AlarmNotification = %v", alarmDef.ClearDelay, *m)
162 // Another alarm clear may have happened during delay and active alarms table index changed
164 idx, found = a.IsMatchFound(m.Alarm)
170 a.UpdateAlarmFields(a.activeAlarms[idx].AlarmId, m)
171 a.alarmHistory = append(a.alarmHistory, *m)
172 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
173 if (len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false) {
174 app.Logger.Warn("alarm history count exceeded maxAlarmHistory threshold")
175 a.GenerateThresholdAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, "history")
178 if a.exceededActiveAlarmOn && m.Alarm.SpecificProblem == alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD {
179 a.exceededActiveAlarmOn = false
182 if a.exceededAlarmHistoryOn && m.Alarm.SpecificProblem == alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD {
183 a.exceededAlarmHistoryOn = false
185 a.WriteAlarmInfoToPersistentVolume()
188 if a.postClear && app.Config.GetBool("controls.noma.enabled") {
189 m.PerceivedSeverity = alarm.SeverityCleared
190 return a.PostAlarm(m)
195 func timerDelay(delay int) {
196 timer := time.NewTimer(time.Duration(delay) * time.Second)
200 func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
201 for i, m := range a.activeAlarms {
202 if m.ManagedObjectId == newAlarm.ManagedObjectId && m.ApplicationId == newAlarm.ApplicationId &&
203 m.SpecificProblem == newAlarm.SpecificProblem && m.IdentifyingInfo == newAlarm.IdentifyingInfo {
210 func (a *AlarmManager) RemoveAlarm(alarms []AlarmNotification, i int, listName string) []AlarmNotification {
211 app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
212 copy(alarms[i:], alarms[i+1:])
213 return alarms[:len(alarms)-1]
216 func (a *AlarmManager) GenerateAlarmId() int {
217 a.uniqueAlarmId++ // @todo: generate a unique ID
218 return a.uniqueAlarmId
221 func (a *AlarmManager) UpdateAlarmFields(alarmId int, newAlarm *AlarmNotification) {
222 alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
223 newAlarm.AlarmId = alarmId
224 newAlarm.AlarmText = alarmDef.AlarmText
225 newAlarm.EventType = alarmDef.EventType
228 func (a *AlarmManager) GenerateThresholdAlarm(sp int, data string) bool {
229 thresholdAlarm := a.alarmClient.NewAlarm(sp, alarm.SeverityWarning, "threshold", data)
230 thresholdMessage := alarm.AlarmMessage{
231 Alarm: thresholdAlarm,
232 AlarmAction: alarm.AlarmActionRaise,
233 AlarmTime: (time.Now().UnixNano()),
235 alarmDef := alarm.RICAlarmDefinitions[sp]
236 alarmId := a.GenerateAlarmId()
237 alarmDef.AlarmId = alarmId
238 a.activeAlarms = append(a.activeAlarms, AlarmNotification{thresholdMessage, *alarmDef})
239 a.alarmHistory = append(a.alarmHistory, AlarmNotification{thresholdMessage, *alarmDef})
244 func (a *AlarmManager) UpdateActiveAlarmList(newAlarm *AlarmNotification) {
245 /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
246 The attempt to raise the alarm next time will be suppressed when found as duplicate. */
247 if (len(a.activeAlarms) >= a.maxActiveAlarms) && (a.exceededActiveAlarmOn == false) {
248 app.Logger.Warn("active alarm count exceeded maxActiveAlarms threshold")
249 a.exceededActiveAlarmOn = a.GenerateThresholdAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, "active")
252 // @todo: For now just keep the active alarms in-memory. Use SDL later for persistence
253 a.activeAlarms = append(a.activeAlarms, *newAlarm)
256 func (a *AlarmManager) UpdateAlarmHistoryList(newAlarm *AlarmNotification) {
257 /* If maximum number of events in alarm history is reached, an error log writing is made,
258 and new alarm indicating the problem is raised. The attempt to add new event time will
261 if (len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false) {
262 app.Logger.Warn("alarm history count exceeded maxAlarmHistory threshold")
263 a.exceededAlarmHistoryOn = a.GenerateThresholdAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, "history")
266 // @todo: For now just keep the alarms history in-memory. Use SDL later for persistence
267 a.alarmHistory = append(a.alarmHistory, *newAlarm)
270 func (a *AlarmManager) PostAlarm(m *AlarmNotification) (*alert.PostAlertsOK, error) {
271 result, err := json.Marshal(m)
273 app.Logger.Info("json.Marshal failed: %v", err)
277 fullUrl := fmt.Sprintf("%s/%s", app.Config.GetString("controls.noma.host"), app.Config.GetString("controls.noma.alarmUrl"))
278 app.Logger.Info("Posting alarm to '%s'", fullUrl)
280 resp, err := http.Post(fullUrl, "application/json", bytes.NewReader(result))
281 if err != nil || resp == nil {
282 app.Logger.Info("Unable to post alarm to '%s': %v", fullUrl, err)
288 func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
289 alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
290 amLabels := models.LabelSet{
291 "status": string(status),
292 "alertname": alarmDef.AlarmText,
293 "severity": string(newAlarm.PerceivedSeverity),
294 "service": fmt.Sprintf("%s/%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
295 "system_name": "RIC",
297 amAnnotations := models.LabelSet{
298 "alarm_id": fmt.Sprintf("%d", alarmDef.AlarmId),
299 "specific_problem": fmt.Sprintf("%d", newAlarm.SpecificProblem),
300 "event_type": alarmDef.EventType,
301 "identifying_info": newAlarm.IdentifyingInfo,
302 "additional_info": newAlarm.AdditionalInfo,
303 "description": fmt.Sprintf("%s:%s", newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo),
304 "instructions": alarmDef.OperationInstructions,
305 "timestamp": fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
308 return amLabels, amAnnotations
311 func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager {
312 cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes)
313 return client.New(cr, strfmt.Default)
316 func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) {
317 pa := &models.PostableAlert{
319 GeneratorURL: strfmt.URI(""),
322 Annotations: amAnnotations,
324 alertParams := alert.NewPostAlertsParams().WithAlerts(models.PostableAlerts{pa})
326 app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations)
327 ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams)
329 app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err)
334 func (a *AlarmManager) StatusCB() bool {
336 app.Logger.Info("RMR not ready yet!")
342 func (a *AlarmManager) ConfigChangeCB(configparam string) {
344 a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
345 a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
346 a.alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
347 a.amHost = viper.GetString("controls.promAlertManager.address")
349 app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
350 app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
351 app.Logger.Debug("ConfigChangeCB: alertInterval %v", a.alertInterval)
352 app.Logger.Debug("ConfigChangeCB: amHost = %v", a.amHost)
357 func (a *AlarmManager) ReadAlarmDefinitionFromJson() {
359 filename := os.Getenv("DEF_FILE")
360 file, err := ioutil.ReadFile(filename)
362 data := RicAlarmDefinitions{}
363 err = json.Unmarshal([]byte(file), &data)
365 for _, alarmDefinition := range data.AlarmDefinitions {
366 _, exists := alarm.RICAlarmDefinitions[alarmDefinition.AlarmId]
368 app.Logger.Error("ReadAlarmDefinitionFromJson: alarm definition already exists for %v", alarmDefinition.AlarmId)
370 app.Logger.Debug("ReadAlarmDefinitionFromJson: alarm %v", alarmDefinition.AlarmId)
371 ricAlarmDefintion := new(alarm.AlarmDefinition)
372 ricAlarmDefintion.AlarmId = alarmDefinition.AlarmId
373 ricAlarmDefintion.AlarmText = alarmDefinition.AlarmText
374 ricAlarmDefintion.EventType = alarmDefinition.EventType
375 ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
376 ricAlarmDefintion.RaiseDelay = alarmDefinition.RaiseDelay
377 ricAlarmDefintion.ClearDelay = alarmDefinition.ClearDelay
378 alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
382 app.Logger.Error("ReadAlarmDefinitionFromJson: json.Unmarshal failed with error %v", err)
385 app.Logger.Error("ReadAlarmDefinitionFromJson: ioutil.ReadFile failed with error %v", err)
389 func (a *AlarmManager) ReadAlarmInfoFromPersistentVolume() {
390 var alarmpersistentinfo AlarmPersistentInfo
391 byteValue, rerr := ioutil.ReadFile(a.alarmInfoPvFile)
393 app.Logger.Error("ararminfo.json file read error %v", rerr)
395 err := json.Unmarshal(byteValue, &alarmpersistentinfo)
397 app.Logger.Error("alarmpersistentinfo json unmarshal error %v", err)
399 a.uniqueAlarmId = alarmpersistentinfo.UniqueAlarmId
400 a.activeAlarms = make([]AlarmNotification, len(alarmpersistentinfo.ActiveAlarms))
401 a.alarmHistory = make([]AlarmNotification, len(alarmpersistentinfo.AlarmHistory))
402 copy(a.activeAlarms, alarmpersistentinfo.ActiveAlarms)
403 copy(a.alarmHistory, alarmpersistentinfo.AlarmHistory)
408 func (a *AlarmManager) WriteAlarmInfoToPersistentVolume() {
409 var alarmpersistentinfo AlarmPersistentInfo
410 alarmpersistentinfo.UniqueAlarmId = a.uniqueAlarmId
411 alarmpersistentinfo.ActiveAlarms = make([]AlarmNotification, len(a.activeAlarms))
412 alarmpersistentinfo.AlarmHistory = make([]AlarmNotification, len(a.alarmHistory))
413 copy(alarmpersistentinfo.ActiveAlarms, a.activeAlarms)
414 copy(alarmpersistentinfo.AlarmHistory, a.alarmHistory)
415 wdata, err := json.MarshalIndent(alarmpersistentinfo, "", " ")
417 app.Logger.Error("alarmpersistentinfo json marshal error %v", err)
419 werr := ioutil.WriteFile(a.alarmInfoPvFile, wdata, 0777)
421 app.Logger.Error("alarminfo.json file write error %v", werr)
426 func (a *AlarmManager) Run(sdlcheck bool) {
427 app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
428 app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
429 app.Resource.InjectStatusCb(a.StatusCB)
430 app.AddConfigChangeListener(a.ConfigChangeCB)
432 alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
433 a.ReadAlarmDefinitionFromJson()
435 app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
436 app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
437 app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
438 app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
439 app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
440 app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
441 app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
442 app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
443 app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
444 app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET")
446 // Start background timer for re-raising alerts
447 go a.StartAlertTimer()
448 a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
450 a.ReadAlarmInfoFromPersistentVolume()
452 app.RunWithParams(a, sdlcheck)
455 func NewAlarmManager(amHost string, alertInterval int, clearAlarm bool) *AlarmManager {
456 if alertInterval == 0 {
457 alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
461 amHost = viper.GetString("controls.promAlertManager.address")
464 return &AlarmManager{
466 postClear: clearAlarm,
468 amBaseUrl: app.Config.GetString("controls.promAlertManager.baseUrl"),
469 amSchemes: []string{app.Config.GetString("controls.promAlertManager.schemes")},
470 alertInterval: alertInterval,
471 activeAlarms: make([]AlarmNotification, 0),
472 alarmHistory: make([]AlarmNotification, 0),
474 maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
475 maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
476 exceededActiveAlarmOn: false,
477 exceededAlarmHistoryOn: false,
478 alarmInfoPvFile: app.Config.GetString("controls.alarmInfoPvFile"),
484 NewAlarmManager("", 0, true).Run(true)