- FM performance test tool first version for review
[ric-plt/alarm-go.git] / manager / cmd / manager.go
index fb30ba7..f00a812 100755 (executable)
@@ -23,17 +23,17 @@ package main
 import (
        "encoding/json"
        "fmt"
 import (
        "encoding/json"
        "fmt"
-       "time"
-
+       "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
+       app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
        clientruntime "github.com/go-openapi/runtime/client"
        "github.com/go-openapi/strfmt"
        "github.com/prometheus/alertmanager/api/v2/client"
        "github.com/prometheus/alertmanager/api/v2/client/alert"
        "github.com/prometheus/alertmanager/api/v2/models"
        "github.com/spf13/viper"
        clientruntime "github.com/go-openapi/runtime/client"
        "github.com/go-openapi/strfmt"
        "github.com/prometheus/alertmanager/api/v2/client"
        "github.com/prometheus/alertmanager/api/v2/client/alert"
        "github.com/prometheus/alertmanager/api/v2/models"
        "github.com/spf13/viper"
-
-       "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
-       app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
+       "io/ioutil"
+       "os"
+       "time"
 )
 
 func (a *AlarmManager) StartAlertTimer() {
 )
 
 func (a *AlarmManager) StartAlertTimer() {
@@ -75,17 +75,20 @@ func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, err
 }
 
 func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, error) {
 }
 
 func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, error) {
+       a.mutex.Lock()
        if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
                app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
        if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
                app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
+               a.mutex.Unlock()
                return nil, nil
        }
 
        // Suppress duplicate alarms
        idx, found := a.IsMatchFound(m.Alarm)
                return nil, nil
        }
 
        // Suppress duplicate alarms
        idx, found := a.IsMatchFound(m.Alarm)
-       if found && m.AlarmAction == alarm.AlarmActionRaise  {
+       if found && m.AlarmAction == alarm.AlarmActionRaise {
                app.Logger.Info("Duplicate alarm found, suppressing ...")
                if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
                        // Duplicate with same severity found
                app.Logger.Info("Duplicate alarm found, suppressing ...")
                if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
                        // Duplicate with same severity found
+                       a.mutex.Unlock()
                        return nil, nil
                } else {
                        // Remove duplicate with different severity
                        return nil, nil
                } else {
                        // Remove duplicate with different severity
@@ -93,27 +96,42 @@ func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK,
                }
        }
 
                }
        }
 
-
        // Clear alarm if found from active alarm list
        if m.AlarmAction == alarm.AlarmActionClear {
                if found {
                        a.alarmHistory = append(a.alarmHistory, *m)
                        a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
        // Clear alarm if found from active alarm list
        if m.AlarmAction == alarm.AlarmActionClear {
                if found {
                        a.alarmHistory = append(a.alarmHistory, *m)
                        a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
-
+                       if ((len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false)){
+                               app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
+                               histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history")
+                               histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
+                               a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
+                               a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
+                       }
+                       if ((a.exceededActiveAlarmOn == true) && (m.Alarm.SpecificProblem == alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD)) {
+                               a.exceededActiveAlarmOn = false
+                       }
+                       if ((a.exceededAlarmHistoryOn == true) && (m.Alarm.SpecificProblem == alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD)) {
+                               a.exceededAlarmHistoryOn = false
+                       }
                        if a.postClear {
                        if a.postClear {
+                               a.mutex.Unlock()
                                return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
                        }
                }
                app.Logger.Info("No matching active alarm found, suppressing ...")
                                return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
                        }
                }
                app.Logger.Info("No matching active alarm found, suppressing ...")
+               a.mutex.Unlock()
                return nil, nil
        }
 
        // New alarm -> update active alarms and post to Alert Manager
        if m.AlarmAction == alarm.AlarmActionRaise {
                a.UpdateAlarmLists(m)
                return nil, nil
        }
 
        // New alarm -> update active alarms and post to Alert Manager
        if m.AlarmAction == alarm.AlarmActionRaise {
                a.UpdateAlarmLists(m)
+               a.mutex.Unlock()
                return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
        }
 
                return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
        }
 
+       a.mutex.Unlock()
        return nil, nil
 }
 
        return nil, nil
 }
 
@@ -128,34 +146,30 @@ func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
 }
 
 func (a *AlarmManager) RemoveAlarm(alarms []alarm.AlarmMessage, i int, listName string) []alarm.AlarmMessage {
 }
 
 func (a *AlarmManager) RemoveAlarm(alarms []alarm.AlarmMessage, i int, listName string) []alarm.AlarmMessage {
-       a.mutex.Lock()
-       defer a.mutex.Unlock()
-
        app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
        copy(alarms[i:], alarms[i+1:])
        return alarms[:len(alarms)-1]
 }
 
 func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
        app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
        copy(alarms[i:], alarms[i+1:])
        return alarms[:len(alarms)-1]
 }
 
 func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
-       a.mutex.Lock()
-       defer a.mutex.Unlock()
-
        /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
           The attempt to raise the alarm next time will be supressed when found as duplicate. */
        /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
           The attempt to raise the alarm next time will be supressed when found as duplicate. */
-       if len(a.activeAlarms) >= a.maxActiveAlarms {
+       if ((len(a.activeAlarms) >= a.maxActiveAlarms) && (a.exceededActiveAlarmOn == false)) {
                app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold")
                app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold")
-               actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "active alarms full")
+               actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "active")
                actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
                a.activeAlarms = append(a.activeAlarms, actAlarmMessage)
                a.alarmHistory = append(a.alarmHistory, actAlarmMessage)
                actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
                a.activeAlarms = append(a.activeAlarms, actAlarmMessage)
                a.alarmHistory = append(a.alarmHistory, actAlarmMessage)
+               a.exceededActiveAlarmOn = true
        }
 
        }
 
-       if len(a.alarmHistory) >= a.maxAlarmHistory {
+       if ((len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false)) {
                app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
                app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
-               histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "alarm history full")
+               histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history")
                histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
                a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
                a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
                histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
                a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
                a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
+               a.exceededAlarmHistoryOn = true
        }
 
        // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
        }
 
        // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
@@ -219,25 +233,66 @@ func (a *AlarmManager) ConfigChangeCB(configparam string) {
 
        a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
        a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
 
        a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
        a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
+       a.alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
+       a.amHost = viper.GetString("controls.promAlertManager.address")
 
        app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
        app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
 
        app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
        app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
+       app.Logger.Debug("ConfigChangeCB: alertInterval %v", a.alertInterval)
+       app.Logger.Debug("ConfigChangeCB: amHost = %v", a.amHost)
 
        return
 }
 
 
        return
 }
 
+func (a *AlarmManager) ReadAlarmDefinitionFromJson() {
+
+       filename := os.Getenv("DEF_FILE")
+       file, err := ioutil.ReadFile(filename)
+       if err == nil {
+               data := RicAlarmDefinitions{}
+               err = json.Unmarshal([]byte(file), &data)
+               if err == nil {
+                       for _, alarmDefinition := range data.AlarmDefinitions {
+                               _, exists := alarm.RICAlarmDefinitions[alarmDefinition.AlarmId]
+                               if exists {
+                                       app.Logger.Error("ReadAlarmDefinitionFromJson: alarm definition already exists for %v", alarmDefinition.AlarmId)
+                               } else {
+                                       app.Logger.Debug("ReadAlarmDefinitionFromJson: alarm  %v", alarmDefinition.AlarmId)
+                                       ricAlarmDefintion := new(alarm.AlarmDefinition)
+                                       ricAlarmDefintion.AlarmId = alarmDefinition.AlarmId
+                                       ricAlarmDefintion.AlarmText = alarmDefinition.AlarmText
+                                       ricAlarmDefintion.EventType = alarmDefinition.EventType
+                                       ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
+                                       alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
+                               }
+                       }
+               } else {
+                       app.Logger.Error("ReadAlarmDefinitionFromJson: json.Unmarshal failed with error %v", err)
+               }
+       } else {
+               app.Logger.Error("ReadAlarmDefinitionFromJson: ioutil.ReadFile failed with error %v", err)
+       }
+}
+
 func (a *AlarmManager) Run(sdlcheck bool) {
        app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
        app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
        app.Resource.InjectStatusCb(a.StatusCB)
        app.AddConfigChangeListener(a.ConfigChangeCB)
 
 func (a *AlarmManager) Run(sdlcheck bool) {
        app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
        app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
        app.Resource.InjectStatusCb(a.StatusCB)
        app.AddConfigChangeListener(a.ConfigChangeCB)
 
+       alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
+       a.ReadAlarmDefinitionFromJson()
+
        app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
        app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
        app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
        app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
        app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
        app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
        app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
        app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
        app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
        app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
        app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
        app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
+       app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
+       app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
+       app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
+       app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET")
 
        // Start background timer for re-raising alerts
        a.postClear = sdlcheck
 
        // Start background timer for re-raising alerts
        a.postClear = sdlcheck
@@ -266,6 +321,8 @@ func NewAlarmManager(amHost string, alertInterval int) *AlarmManager {
                alarmHistory:    make([]alarm.AlarmMessage, 0),
                maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
                maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
                alarmHistory:    make([]alarm.AlarmMessage, 0),
                maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
                maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
+               exceededActiveAlarmOn:  false,
+               exceededAlarmHistoryOn: false,
        }
 }
 
        }
 }