- FM performance test tool first version for review
[ric-plt/alarm-go.git] / manager / cmd / manager.go
index 59beeac..f00a812 100755 (executable)
@@ -23,17 +23,17 @@ package main
 import (
        "encoding/json"
        "fmt"
-       "time"
-
+       "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
+       app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
        clientruntime "github.com/go-openapi/runtime/client"
        "github.com/go-openapi/strfmt"
        "github.com/prometheus/alertmanager/api/v2/client"
        "github.com/prometheus/alertmanager/api/v2/client/alert"
        "github.com/prometheus/alertmanager/api/v2/models"
        "github.com/spf13/viper"
-
-       "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
-       app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
+       "io/ioutil"
+       "os"
+       "time"
 )
 
 func (a *AlarmManager) StartAlertTimer() {
@@ -75,16 +75,25 @@ func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, err
 }
 
 func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, error) {
+       a.mutex.Lock()
        if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
                app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
+               a.mutex.Unlock()
                return nil, nil
        }
 
        // Suppress duplicate alarms
        idx, found := a.IsMatchFound(m.Alarm)
-       if found && m.AlarmAction != alarm.AlarmActionClear {
+       if found && m.AlarmAction == alarm.AlarmActionRaise {
                app.Logger.Info("Duplicate alarm found, suppressing ...")
-               return nil, nil
+               if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
+                       // Duplicate with same severity found
+                       a.mutex.Unlock()
+                       return nil, nil
+               } else {
+                       // Remove duplicate with different severity
+                       a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
+               }
        }
 
        // Clear alarm if found from active alarm list
@@ -92,21 +101,37 @@ func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK,
                if found {
                        a.alarmHistory = append(a.alarmHistory, *m)
                        a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
-
+                       if ((len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false)){
+                               app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
+                               histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history")
+                               histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
+                               a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
+                               a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
+                       }
+                       if ((a.exceededActiveAlarmOn == true) && (m.Alarm.SpecificProblem == alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD)) {
+                               a.exceededActiveAlarmOn = false
+                       }
+                       if ((a.exceededAlarmHistoryOn == true) && (m.Alarm.SpecificProblem == alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD)) {
+                               a.exceededAlarmHistoryOn = false
+                       }
                        if a.postClear {
+                               a.mutex.Unlock()
                                return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
                        }
                }
                app.Logger.Info("No matching active alarm found, suppressing ...")
+               a.mutex.Unlock()
                return nil, nil
        }
 
        // New alarm -> update active alarms and post to Alert Manager
        if m.AlarmAction == alarm.AlarmActionRaise {
                a.UpdateAlarmLists(m)
+               a.mutex.Unlock()
                return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
        }
 
+       a.mutex.Unlock()
        return nil, nil
 }
 
@@ -121,34 +146,30 @@ func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
 }
 
 func (a *AlarmManager) RemoveAlarm(alarms []alarm.AlarmMessage, i int, listName string) []alarm.AlarmMessage {
-       a.mutex.Lock()
-       defer a.mutex.Unlock()
-
        app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
        copy(alarms[i:], alarms[i+1:])
        return alarms[:len(alarms)-1]
 }
 
 func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
-       a.mutex.Lock()
-       defer a.mutex.Unlock()
-
        /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
           The attempt to raise the alarm next time will be supressed when found as duplicate. */
-       if len(a.activeAlarms) >= a.maxActiveAlarms {
+       if ((len(a.activeAlarms) >= a.maxActiveAlarms) && (a.exceededActiveAlarmOn == false)) {
                app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold")
-               actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "active alarms full")
+               actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "active")
                actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
                a.activeAlarms = append(a.activeAlarms, actAlarmMessage)
                a.alarmHistory = append(a.alarmHistory, actAlarmMessage)
+               a.exceededActiveAlarmOn = true
        }
 
-       if len(a.alarmHistory) >= a.maxAlarmHistory {
+       if ((len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false)) {
                app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
-               histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "alarm history full")
+               histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history")
                histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
                a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
                a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
+               a.exceededAlarmHistoryOn = true
        }
 
        // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
@@ -223,18 +244,55 @@ func (a *AlarmManager) ConfigChangeCB(configparam string) {
        return
 }
 
+func (a *AlarmManager) ReadAlarmDefinitionFromJson() {
+
+       filename := os.Getenv("DEF_FILE")
+       file, err := ioutil.ReadFile(filename)
+       if err == nil {
+               data := RicAlarmDefinitions{}
+               err = json.Unmarshal([]byte(file), &data)
+               if err == nil {
+                       for _, alarmDefinition := range data.AlarmDefinitions {
+                               _, exists := alarm.RICAlarmDefinitions[alarmDefinition.AlarmId]
+                               if exists {
+                                       app.Logger.Error("ReadAlarmDefinitionFromJson: alarm definition already exists for %v", alarmDefinition.AlarmId)
+                               } else {
+                                       app.Logger.Debug("ReadAlarmDefinitionFromJson: alarm  %v", alarmDefinition.AlarmId)
+                                       ricAlarmDefintion := new(alarm.AlarmDefinition)
+                                       ricAlarmDefintion.AlarmId = alarmDefinition.AlarmId
+                                       ricAlarmDefintion.AlarmText = alarmDefinition.AlarmText
+                                       ricAlarmDefintion.EventType = alarmDefinition.EventType
+                                       ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
+                                       alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
+                               }
+                       }
+               } else {
+                       app.Logger.Error("ReadAlarmDefinitionFromJson: json.Unmarshal failed with error %v", err)
+               }
+       } else {
+               app.Logger.Error("ReadAlarmDefinitionFromJson: ioutil.ReadFile failed with error %v", err)
+       }
+}
+
 func (a *AlarmManager) Run(sdlcheck bool) {
        app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
        app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
        app.Resource.InjectStatusCb(a.StatusCB)
        app.AddConfigChangeListener(a.ConfigChangeCB)
 
+       alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
+       a.ReadAlarmDefinitionFromJson()
+
        app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
        app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
        app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
        app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
        app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
        app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
+       app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
+       app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
+       app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
+       app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET")
 
        // Start background timer for re-raising alerts
        a.postClear = sdlcheck
@@ -263,6 +321,8 @@ func NewAlarmManager(amHost string, alertInterval int) *AlarmManager {
                alarmHistory:    make([]alarm.AlarmMessage, 0),
                maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
                maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
+               exceededActiveAlarmOn:  false,
+               exceededAlarmHistoryOn: false,
        }
 }