- FM performance test tool first version for review
[ric-plt/alarm-go.git] / manager / cmd / manager.go
index d74c62d..f00a812 100755 (executable)
@@ -23,17 +23,17 @@ package main
 import (
        "encoding/json"
        "fmt"
-       "time"
-
+       "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
+       app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
        clientruntime "github.com/go-openapi/runtime/client"
        "github.com/go-openapi/strfmt"
        "github.com/prometheus/alertmanager/api/v2/client"
        "github.com/prometheus/alertmanager/api/v2/client/alert"
        "github.com/prometheus/alertmanager/api/v2/models"
        "github.com/spf13/viper"
-
-       "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
-       app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
+       "io/ioutil"
+       "os"
+       "time"
 )
 
 func (a *AlarmManager) StartAlertTimer() {
@@ -42,7 +42,7 @@ func (a *AlarmManager) StartAlertTimer() {
                a.mutex.Lock()
                for _, m := range a.activeAlarms {
                        app.Logger.Info("Re-raising alarm: %v", m)
-                       a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive))
+                       a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
                }
                a.mutex.Unlock()
        }
@@ -75,37 +75,63 @@ func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, err
 }
 
 func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, error) {
+       a.mutex.Lock()
        if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
                app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
+               a.mutex.Unlock()
                return nil, nil
        }
 
        // Suppress duplicate alarms
        idx, found := a.IsMatchFound(m.Alarm)
-       if found && m.AlarmAction != alarm.AlarmActionClear {
+       if found && m.AlarmAction == alarm.AlarmActionRaise {
                app.Logger.Info("Duplicate alarm found, suppressing ...")
-               return nil, nil
+               if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
+                       // Duplicate with same severity found
+                       a.mutex.Unlock()
+                       return nil, nil
+               } else {
+                       // Remove duplicate with different severity
+                       a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
+               }
        }
 
        // Clear alarm if found from active alarm list
        if m.AlarmAction == alarm.AlarmActionClear {
                if found {
+                       a.alarmHistory = append(a.alarmHistory, *m)
                        a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
-
+                       if ((len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false)){
+                               app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
+                               histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history")
+                               histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
+                               a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
+                               a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
+                       }
+                       if ((a.exceededActiveAlarmOn == true) && (m.Alarm.SpecificProblem == alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD)) {
+                               a.exceededActiveAlarmOn = false
+                       }
+                       if ((a.exceededAlarmHistoryOn == true) && (m.Alarm.SpecificProblem == alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD)) {
+                               a.exceededAlarmHistoryOn = false
+                       }
                        if a.postClear {
-                               return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved))
+                               a.mutex.Unlock()
+                               return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
                        }
                }
                app.Logger.Info("No matching active alarm found, suppressing ...")
+               a.mutex.Unlock()
                return nil, nil
        }
 
        // New alarm -> update active alarms and post to Alert Manager
        if m.AlarmAction == alarm.AlarmActionRaise {
                a.UpdateAlarmLists(m)
-               return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive))
+               a.mutex.Unlock()
+               return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
        }
 
+       a.mutex.Unlock()
        return nil, nil
 }
 
@@ -120,25 +146,30 @@ func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
 }
 
 func (a *AlarmManager) RemoveAlarm(alarms []alarm.AlarmMessage, i int, listName string) []alarm.AlarmMessage {
-       a.mutex.Lock()
-       defer a.mutex.Unlock()
-
        app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
        copy(alarms[i:], alarms[i+1:])
        return alarms[:len(alarms)-1]
 }
 
 func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
-       a.mutex.Lock()
-       defer a.mutex.Unlock()
-
-       // If maximum number of active alarms is reached, purge the oldest alarm
-       if len(a.activeAlarms) >= viper.GetInt("controls.maxActiveAlarms") {
-               a.activeAlarms = a.RemoveAlarm(a.activeAlarms, 0, "active")
+       /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
+          The attempt to raise the alarm next time will be supressed when found as duplicate. */
+       if ((len(a.activeAlarms) >= a.maxActiveAlarms) && (a.exceededActiveAlarmOn == false)) {
+               app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold")
+               actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "active")
+               actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
+               a.activeAlarms = append(a.activeAlarms, actAlarmMessage)
+               a.alarmHistory = append(a.alarmHistory, actAlarmMessage)
+               a.exceededActiveAlarmOn = true
        }
 
-       if len(a.alarmHistory) >= viper.GetInt("controls.maxAlarmHistory") {
-               a.alarmHistory = a.RemoveAlarm(a.alarmHistory, 0, "history")
+       if ((len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false)) {
+               app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
+               histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history")
+               histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
+               a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
+               a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
+               a.exceededAlarmHistoryOn = true
        }
 
        // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
@@ -146,7 +177,7 @@ func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
        a.alarmHistory = append(a.alarmHistory, *newAlarm)
 }
 
-func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus) (models.LabelSet, models.LabelSet) {
+func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
        alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
        amLabels := models.LabelSet{
                "status":      string(status),
@@ -161,6 +192,7 @@ func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertSta
                "additional_info": newAlarm.AdditionalInfo,
                "summary":         alarmDef.EventType,
                "instructions":    alarmDef.OperationInstructions,
+               "timestamp":       fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
        }
 
        return amLabels, amAnnotations
@@ -197,19 +229,75 @@ func (a *AlarmManager) StatusCB() bool {
        return a.rmrReady
 }
 
+func (a *AlarmManager) ConfigChangeCB(configparam string) {
+
+       a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
+       a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
+       a.alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
+       a.amHost = viper.GetString("controls.promAlertManager.address")
+
+       app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
+       app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
+       app.Logger.Debug("ConfigChangeCB: alertInterval %v", a.alertInterval)
+       app.Logger.Debug("ConfigChangeCB: amHost = %v", a.amHost)
+
+       return
+}
+
+func (a *AlarmManager) ReadAlarmDefinitionFromJson() {
+
+       filename := os.Getenv("DEF_FILE")
+       file, err := ioutil.ReadFile(filename)
+       if err == nil {
+               data := RicAlarmDefinitions{}
+               err = json.Unmarshal([]byte(file), &data)
+               if err == nil {
+                       for _, alarmDefinition := range data.AlarmDefinitions {
+                               _, exists := alarm.RICAlarmDefinitions[alarmDefinition.AlarmId]
+                               if exists {
+                                       app.Logger.Error("ReadAlarmDefinitionFromJson: alarm definition already exists for %v", alarmDefinition.AlarmId)
+                               } else {
+                                       app.Logger.Debug("ReadAlarmDefinitionFromJson: alarm  %v", alarmDefinition.AlarmId)
+                                       ricAlarmDefintion := new(alarm.AlarmDefinition)
+                                       ricAlarmDefintion.AlarmId = alarmDefinition.AlarmId
+                                       ricAlarmDefintion.AlarmText = alarmDefinition.AlarmText
+                                       ricAlarmDefintion.EventType = alarmDefinition.EventType
+                                       ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
+                                       alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
+                               }
+                       }
+               } else {
+                       app.Logger.Error("ReadAlarmDefinitionFromJson: json.Unmarshal failed with error %v", err)
+               }
+       } else {
+               app.Logger.Error("ReadAlarmDefinitionFromJson: ioutil.ReadFile failed with error %v", err)
+       }
+}
+
 func (a *AlarmManager) Run(sdlcheck bool) {
        app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
        app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
        app.Resource.InjectStatusCb(a.StatusCB)
+       app.AddConfigChangeListener(a.ConfigChangeCB)
+
+       alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
+       a.ReadAlarmDefinitionFromJson()
 
        app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
        app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
        app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
        app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
+       app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
+       app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
+       app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
+       app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
+       app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
+       app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET")
 
        // Start background timer for re-raising alerts
        a.postClear = sdlcheck
        go a.StartAlertTimer()
+       a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
 
        app.RunWithParams(a, sdlcheck)
 }
@@ -224,13 +312,17 @@ func NewAlarmManager(amHost string, alertInterval int) *AlarmManager {
        }
 
        return &AlarmManager{
-               rmrReady:      false,
-               amHost:        amHost,
-               amBaseUrl:     viper.GetString("controls.promAlertManager.baseUrl"),
-               amSchemes:     []string{viper.GetString("controls.promAlertManager.schemes")},
-               alertInterval: alertInterval,
-               activeAlarms:  make([]alarm.AlarmMessage, 0),
-               alarmHistory:  make([]alarm.AlarmMessage, 0),
+               rmrReady:        false,
+               amHost:          amHost,
+               amBaseUrl:       viper.GetString("controls.promAlertManager.baseUrl"),
+               amSchemes:       []string{viper.GetString("controls.promAlertManager.schemes")},
+               alertInterval:   alertInterval,
+               activeAlarms:    make([]alarm.AlarmMessage, 0),
+               alarmHistory:    make([]alarm.AlarmMessage, 0),
+               maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
+               maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
+               exceededActiveAlarmOn:  false,
+               exceededAlarmHistoryOn: false,
        }
 }