Merge "alarm manager to manage predefined platform alarm definitions in a json file...
[ric-plt/alarm-go.git] / manager / cmd / manager.go
index f4b7b8e..6e67672 100755 (executable)
@@ -24,16 +24,16 @@ import (
        "encoding/json"
        "fmt"
        "time"
-
+        "os"
+       "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
+       app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
        clientruntime "github.com/go-openapi/runtime/client"
        "github.com/go-openapi/strfmt"
        "github.com/prometheus/alertmanager/api/v2/client"
        "github.com/prometheus/alertmanager/api/v2/client/alert"
        "github.com/prometheus/alertmanager/api/v2/models"
        "github.com/spf13/viper"
-
-       "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
-       app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
+       "io/ioutil"
 )
 
 func (a *AlarmManager) StartAlertTimer() {
@@ -42,7 +42,7 @@ func (a *AlarmManager) StartAlertTimer() {
                a.mutex.Lock()
                for _, m := range a.activeAlarms {
                        app.Logger.Info("Re-raising alarm: %v", m)
-                       a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive))
+                       a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
                }
                a.mutex.Unlock()
        }
@@ -82,9 +82,15 @@ func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK,
 
        // Suppress duplicate alarms
        idx, found := a.IsMatchFound(m.Alarm)
-       if found && m.AlarmAction != alarm.AlarmActionClear {
+       if found && m.AlarmAction == alarm.AlarmActionRaise {
                app.Logger.Info("Duplicate alarm found, suppressing ...")
-               return nil, nil
+               if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
+                       // Duplicate with same severity found
+                       return nil, nil
+               } else {
+                       // Remove duplicate with different severity
+                       a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
+               }
        }
 
        // Clear alarm if found from active alarm list
@@ -94,7 +100,7 @@ func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK,
                        a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
 
                        if a.postClear {
-                               return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved))
+                               return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
                        }
                }
                app.Logger.Info("No matching active alarm found, suppressing ...")
@@ -104,7 +110,7 @@ func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK,
        // New alarm -> update active alarms and post to Alert Manager
        if m.AlarmAction == alarm.AlarmActionRaise {
                a.UpdateAlarmLists(m)
-               return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive))
+               return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
        }
 
        return nil, nil
@@ -133,13 +139,22 @@ func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
        a.mutex.Lock()
        defer a.mutex.Unlock()
 
-       // If maximum number of active alarms is reached, purge the oldest alarm
-       if len(a.activeAlarms) >= viper.GetInt("controls.maxActiveAlarms") {
-               a.activeAlarms = a.RemoveAlarm(a.activeAlarms, 0, "active")
+       /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
+          The attempt to raise the alarm next time will be supressed when found as duplicate. */
+       if len(a.activeAlarms) >= a.maxActiveAlarms {
+               app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold")
+               actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "active alarms full")
+               actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
+               a.activeAlarms = append(a.activeAlarms, actAlarmMessage)
+               a.alarmHistory = append(a.alarmHistory, actAlarmMessage)
        }
 
-       if len(a.alarmHistory) >= viper.GetInt("controls.maxAlarmHistory") {
-               a.alarmHistory = a.RemoveAlarm(a.alarmHistory, 0, "history")
+       if len(a.alarmHistory) >= a.maxAlarmHistory {
+               app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
+               histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "alarm history full")
+               histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
+               a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
+               a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
        }
 
        // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
@@ -147,7 +162,7 @@ func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
        a.alarmHistory = append(a.alarmHistory, *newAlarm)
 }
 
-func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus) (models.LabelSet, models.LabelSet) {
+func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
        alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
        amLabels := models.LabelSet{
                "status":      string(status),
@@ -162,6 +177,7 @@ func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertSta
                "additional_info": newAlarm.AdditionalInfo,
                "summary":         alarmDef.EventType,
                "instructions":    alarmDef.OperationInstructions,
+               "timestamp":       fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
        }
 
        return amLabels, amAnnotations
@@ -198,19 +214,71 @@ func (a *AlarmManager) StatusCB() bool {
        return a.rmrReady
 }
 
+func (a *AlarmManager) ConfigChangeCB(configparam string) {
+
+       a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
+       a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
+
+       app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
+       app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
+
+       return
+}
+
+func (a *AlarmManager) ReadAlarmDefinitionFromJson() {
+
+       filename := os.Getenv("DEF_FILE")
+       file, err := ioutil.ReadFile(filename)
+       if err == nil {
+               data := RicAlarmDefinitions{}
+               err = json.Unmarshal([]byte(file), &data)
+               if err == nil {
+                       for _, alarmDefinition := range data.AlarmDefinitions {
+                               _, exists := alarm.RICAlarmDefinitions[alarmDefinition.AlarmId]
+                               if exists {
+                                       app.Logger.Error("ReadAlarmDefinitionFromJson: alarm definition already exists for %v", alarmDefinition.AlarmId)
+                               } else {
+                                       app.Logger.Debug("ReadAlarmDefinitionFromJson: alarm  %v", alarmDefinition.AlarmId)
+                                       ricAlarmDefintion := new(alarm.AlarmDefinition)
+                                       ricAlarmDefintion.AlarmId = alarmDefinition.AlarmId
+                                       ricAlarmDefintion.AlarmText = alarmDefinition.AlarmText
+                                       ricAlarmDefintion.EventType = alarmDefinition.EventType
+                                       ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
+                                       alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
+                               }
+                       }
+               } else {
+                       app.Logger.Error("json.Unmarshal failed with error %v", err)
+               }
+       } else {
+               app.Logger.Error("ioutil.ReadFile failed with error %v", err)
+       }
+}
+
 func (a *AlarmManager) Run(sdlcheck bool) {
        app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
        app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
        app.Resource.InjectStatusCb(a.StatusCB)
+       app.AddConfigChangeListener(a.ConfigChangeCB)
+
+       alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
+       a.ReadAlarmDefinitionFromJson()
 
        app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
        app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
        app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
        app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
+       app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
+       app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
+       app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
+       app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
+       app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
+       app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET")
 
        // Start background timer for re-raising alerts
        a.postClear = sdlcheck
        go a.StartAlertTimer()
+       a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
 
        app.RunWithParams(a, sdlcheck)
 }
@@ -225,13 +293,15 @@ func NewAlarmManager(amHost string, alertInterval int) *AlarmManager {
        }
 
        return &AlarmManager{
-               rmrReady:      false,
-               amHost:        amHost,
-               amBaseUrl:     viper.GetString("controls.promAlertManager.baseUrl"),
-               amSchemes:     []string{viper.GetString("controls.promAlertManager.schemes")},
-               alertInterval: alertInterval,
-               activeAlarms:  make([]alarm.AlarmMessage, 0),
-               alarmHistory:  make([]alarm.AlarmMessage, 0),
+               rmrReady:        false,
+               amHost:          amHost,
+               amBaseUrl:       viper.GetString("controls.promAlertManager.baseUrl"),
+               amSchemes:       []string{viper.GetString("controls.promAlertManager.schemes")},
+               alertInterval:   alertInterval,
+               activeAlarms:    make([]alarm.AlarmMessage, 0),
+               alarmHistory:    make([]alarm.AlarmMessage, 0),
+               maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
+               maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
        }
 }