- FM performance test tool first version for review
[ric-plt/alarm-go.git] / manager / cmd / manager.go
index f3d3260..f00a812 100755 (executable)
@@ -23,8 +23,6 @@ package main
 import (
        "encoding/json"
        "fmt"
-       "time"
-        "os"
        "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
        app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
        clientruntime "github.com/go-openapi/runtime/client"
@@ -34,6 +32,8 @@ import (
        "github.com/prometheus/alertmanager/api/v2/models"
        "github.com/spf13/viper"
        "io/ioutil"
+       "os"
+       "time"
 )
 
 func (a *AlarmManager) StartAlertTimer() {
@@ -75,8 +75,10 @@ func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, err
 }
 
 func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, error) {
+       a.mutex.Lock()
        if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
                app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
+               a.mutex.Unlock()
                return nil, nil
        }
 
@@ -86,6 +88,7 @@ func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK,
                app.Logger.Info("Duplicate alarm found, suppressing ...")
                if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
                        // Duplicate with same severity found
+                       a.mutex.Unlock()
                        return nil, nil
                } else {
                        // Remove duplicate with different severity
@@ -98,21 +101,37 @@ func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK,
                if found {
                        a.alarmHistory = append(a.alarmHistory, *m)
                        a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
-
+                       if ((len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false)){
+                               app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
+                               histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history")
+                               histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
+                               a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
+                               a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
+                       }
+                       if ((a.exceededActiveAlarmOn == true) && (m.Alarm.SpecificProblem == alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD)) {
+                               a.exceededActiveAlarmOn = false
+                       }
+                       if ((a.exceededAlarmHistoryOn == true) && (m.Alarm.SpecificProblem == alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD)) {
+                               a.exceededAlarmHistoryOn = false
+                       }
                        if a.postClear {
+                               a.mutex.Unlock()
                                return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
                        }
                }
                app.Logger.Info("No matching active alarm found, suppressing ...")
+               a.mutex.Unlock()
                return nil, nil
        }
 
        // New alarm -> update active alarms and post to Alert Manager
        if m.AlarmAction == alarm.AlarmActionRaise {
                a.UpdateAlarmLists(m)
+               a.mutex.Unlock()
                return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
        }
 
+       a.mutex.Unlock()
        return nil, nil
 }
 
@@ -127,34 +146,30 @@ func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
 }
 
 func (a *AlarmManager) RemoveAlarm(alarms []alarm.AlarmMessage, i int, listName string) []alarm.AlarmMessage {
-       a.mutex.Lock()
-       defer a.mutex.Unlock()
-
        app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
        copy(alarms[i:], alarms[i+1:])
        return alarms[:len(alarms)-1]
 }
 
 func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
-       a.mutex.Lock()
-       defer a.mutex.Unlock()
-
        /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
           The attempt to raise the alarm next time will be supressed when found as duplicate. */
-       if len(a.activeAlarms) >= a.maxActiveAlarms {
+       if ((len(a.activeAlarms) >= a.maxActiveAlarms) && (a.exceededActiveAlarmOn == false)) {
                app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold")
-               actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "active alarms full")
+               actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "active")
                actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
                a.activeAlarms = append(a.activeAlarms, actAlarmMessage)
                a.alarmHistory = append(a.alarmHistory, actAlarmMessage)
+               a.exceededActiveAlarmOn = true
        }
 
-       if len(a.alarmHistory) >= a.maxAlarmHistory {
+       if ((len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false)) {
                app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
-               histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "alarm history full")
+               histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history")
                histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
                a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
                a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
+               a.exceededAlarmHistoryOn = true
        }
 
        // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
@@ -252,10 +267,10 @@ func (a *AlarmManager) ReadAlarmDefinitionFromJson() {
                                }
                        }
                } else {
-                       app.Logger.Error("json.Unmarshal failed with error %v", err)
+                       app.Logger.Error("ReadAlarmDefinitionFromJson: json.Unmarshal failed with error %v", err)
                }
        } else {
-               app.Logger.Error("ioutil.ReadFile failed with error %v", err)
+               app.Logger.Error("ReadAlarmDefinitionFromJson: ioutil.ReadFile failed with error %v", err)
        }
 }
 
@@ -306,6 +321,8 @@ func NewAlarmManager(amHost string, alertInterval int) *AlarmManager {
                alarmHistory:    make([]alarm.AlarmMessage, 0),
                maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
                maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
+               exceededActiveAlarmOn:  false,
+               exceededAlarmHistoryOn: false,
        }
 }