From f5a8e71bbed7781b7b436a1838139bdcfd4a4b26 Mon Sep 17 00:00:00 2001 From: Mohamed Abukar Date: Mon, 19 Oct 2020 16:58:17 +0300 Subject: [PATCH] Additional alarm information Change-Id: I100b3898742c8811ad1218d7c4d4771a1ca1cfac Signed-off-by: Mohamed Abukar --- alarm/alarm.go | 20 +++++++------- alarm/types.go | 6 ++-- definitions/alarm-definition.json | 12 ++++---- go.mod | 2 ++ manager/cmd/manager.go | 58 +++++++++++++++++++++++---------------- manager/cmd/manager_test.go | 6 ++-- manager/cmd/restapi.go | 2 +- manager/cmd/types.go | 32 ++++++++++++--------- 8 files changed, 79 insertions(+), 59 deletions(-) diff --git a/alarm/alarm.go b/alarm/alarm.go index 3856fce..f8d909a 100755 --- a/alarm/alarm.go +++ b/alarm/alarm.go @@ -56,10 +56,10 @@ func InitAlarm(mo, id string) (*RICAlarm, error) { } if os.Getenv("ALARM_IF_RMR") == "" { - go InitRMR(r, "") - } else { - go InitRMR(r, ALARM_MANAGER_RMR_URL) - } + go InitRMR(r, "") + } else { + go InitRMR(r, ALARM_MANAGER_RMR_URL) + } return r, nil } @@ -180,15 +180,15 @@ func (r *RICAlarm) ReceiveMessage(cb func(AlarmMessage)) error { return errors.New("rmrRcv failed!") } -func InitRMR(r *RICAlarm, endpoint string) error { +func InitRMR(r *RICAlarm, endpoint string) error { // Setup static RT for alarm system if endpoint == "" { if r.moId == "my-pod" { - endpoint = "127.0.0.1:4560" - } else if r.moId == "my-pod-lib" { - endpoint = "127.0.0.1:4588" - } - } + endpoint = "127.0.0.1:4560" + } else if r.moId == "my-pod-lib" { + endpoint = "127.0.0.1:4588" + } + } alarmRT := fmt.Sprintf("newrt|start\nrte|13111|%s\nnewrt|end\n", endpoint) alarmRTFile := "/tmp/alarm.rt" diff --git a/alarm/types.go b/alarm/types.go index be96fec..2aaa84b 100755 --- a/alarm/types.go +++ b/alarm/types.go @@ -148,9 +148,9 @@ const ( ) type AlarmDefinition struct { - AlarmId int `json:"alarmid"` - AlarmText string `json:"alarmtext"` - EventType string `json:"eventtype"` + AlarmId int `json:"alarmId"` + AlarmText string `json:"alarmText"` + EventType string `json:"eventType"` OperationInstructions string `json:"operationinstructions"` } diff --git a/definitions/alarm-definition.json b/definitions/alarm-definition.json index 96d255f..eebf842 100755 --- a/definitions/alarm-definition.json +++ b/definitions/alarm-definition.json @@ -3,37 +3,37 @@ { "alarmId" : 8004, "alarmText" : "RIC ROUTING TABLE DISTRIBUTION FAILED", - "eventType" : "Processing error", + "eventType" : "processingError", "operationInstructions" : "Not defined" }, { "alarmId" : 8005, "alarmText" : "TCP CONNECTIVITY LOST TO DBAAS", - "eventType" : "Communication error", + "eventType" : "communication", "operationInstructions" : "Not defined" }, { "alarmId" : 8006, "alarmText" : "E2 CONNECTIVITY LOST TO G-NODEB", - "eventType" : "Communication error", + "eventType" : "communication", "operationInstructions" : "Not defined" }, { "alarmId" : 8007, "alarmText" : "E2 CONNECTIVITY LOST TO E-NODEB", - "eventType" : "Communication error", + "eventType" : "communication", "operationInstructions" : "Not defined" }, { "alarmId" : 8008, "alarmText" : "ACTIVE ALARM EXCEED MAX THRESHOLD", - "eventType" : "storage warning", + "eventType" : "equipment", "operationInstructions" : "Clear alarms or raise threshold" }, { "alarmId" : 8009, "alarmText" : "ALARM HISTORY EXCEED MAX THRESHOLD", - "eventType" : "storage warning", + "eventType" : "equipment", "operationInstructions" : "Clear alarms or raise threshold" } ] diff --git a/go.mod b/go.mod index b6ae937..0fb43fa 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,8 @@ require ( github.com/gorilla/mux v1.7.1 github.com/jedib0t/go-pretty v4.3.0+incompatible github.com/mattn/go-runewidth v0.0.9 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.1 // indirect github.com/prometheus/alertmanager v0.20.0 github.com/spf13/viper v1.6.2 github.com/stretchr/testify v1.5.1 diff --git a/manager/cmd/manager.go b/manager/cmd/manager.go index f00a812..b7f9ce2 100755 --- a/manager/cmd/manager.go +++ b/manager/cmd/manager.go @@ -71,10 +71,10 @@ func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, err } app.Logger.Info("newAlarm: %v", m) - return a.ProcessAlarm(&m) + return a.ProcessAlarm(&AlarmInformation{m, alarm.AlarmDefinition{}}) } -func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, error) { +func (a *AlarmManager) ProcessAlarm(m *AlarmInformation) (*alert.PostAlertsOK, error) { a.mutex.Lock() if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok { app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem) @@ -101,17 +101,18 @@ func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, if found { a.alarmHistory = append(a.alarmHistory, *m) a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active") - if ((len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false)){ + if (len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false) { app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold") histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history") - histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())} + am := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())} + histAlarmMessage := AlarmInformation{am, alarm.AlarmDefinition{}} a.activeAlarms = append(a.activeAlarms, histAlarmMessage) a.alarmHistory = append(a.alarmHistory, histAlarmMessage) } - if ((a.exceededActiveAlarmOn == true) && (m.Alarm.SpecificProblem == alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD)) { + if (a.exceededActiveAlarmOn == true) && (m.Alarm.SpecificProblem == alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD) { a.exceededActiveAlarmOn = false } - if ((a.exceededAlarmHistoryOn == true) && (m.Alarm.SpecificProblem == alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD)) { + if (a.exceededAlarmHistoryOn == true) && (m.Alarm.SpecificProblem == alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD) { a.exceededAlarmHistoryOn = false } if a.postClear { @@ -145,33 +146,43 @@ func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) { return -1, false } -func (a *AlarmManager) RemoveAlarm(alarms []alarm.AlarmMessage, i int, listName string) []alarm.AlarmMessage { +func (a *AlarmManager) RemoveAlarm(alarms []AlarmInformation, i int, listName string) []AlarmInformation { app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName) copy(alarms[i:], alarms[i+1:]) return alarms[:len(alarms)-1] } -func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) { +func (a *AlarmManager) UpdateAlarmFields(newAlarm *AlarmInformation) { + alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem] + newAlarm.AlarmId = a.uniqueAlarmId + a.uniqueAlarmId++ // @todo: generate a unique ID + newAlarm.AlarmText = alarmDef.AlarmText + newAlarm.EventType = alarmDef.EventType +} + +func (a *AlarmManager) UpdateAlarmLists(newAlarm *AlarmInformation) { /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised. The attempt to raise the alarm next time will be supressed when found as duplicate. */ - if ((len(a.activeAlarms) >= a.maxActiveAlarms) && (a.exceededActiveAlarmOn == false)) { + if (len(a.activeAlarms) >= a.maxActiveAlarms) && (a.exceededActiveAlarmOn == false) { app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold") actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "active") actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())} - a.activeAlarms = append(a.activeAlarms, actAlarmMessage) - a.alarmHistory = append(a.alarmHistory, actAlarmMessage) + a.activeAlarms = append(a.activeAlarms, AlarmInformation{actAlarmMessage, alarm.AlarmDefinition{}}) + a.alarmHistory = append(a.alarmHistory, AlarmInformation{actAlarmMessage, alarm.AlarmDefinition{}}) a.exceededActiveAlarmOn = true } - if ((len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false)) { + if (len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false) { app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold") histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history") histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())} - a.activeAlarms = append(a.activeAlarms, histAlarmMessage) - a.alarmHistory = append(a.alarmHistory, histAlarmMessage) + a.activeAlarms = append(a.activeAlarms, AlarmInformation{histAlarmMessage, alarm.AlarmDefinition{}}) + a.alarmHistory = append(a.alarmHistory, AlarmInformation{histAlarmMessage, alarm.AlarmDefinition{}}) a.exceededAlarmHistoryOn = true } + a.UpdateAlarmFields(newAlarm) + // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence a.activeAlarms = append(a.activeAlarms, *newAlarm) a.alarmHistory = append(a.alarmHistory, *newAlarm) @@ -312,15 +323,16 @@ func NewAlarmManager(amHost string, alertInterval int) *AlarmManager { } return &AlarmManager{ - rmrReady: false, - amHost: amHost, - amBaseUrl: viper.GetString("controls.promAlertManager.baseUrl"), - amSchemes: []string{viper.GetString("controls.promAlertManager.schemes")}, - alertInterval: alertInterval, - activeAlarms: make([]alarm.AlarmMessage, 0), - alarmHistory: make([]alarm.AlarmMessage, 0), - maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"), - maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"), + rmrReady: false, + amHost: amHost, + amBaseUrl: viper.GetString("controls.promAlertManager.baseUrl"), + amSchemes: []string{viper.GetString("controls.promAlertManager.schemes")}, + alertInterval: alertInterval, + activeAlarms: make([]AlarmInformation, 0), + alarmHistory: make([]AlarmInformation, 0), + uniqueAlarmId: 1, + maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"), + maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"), exceededActiveAlarmOn: false, exceededAlarmHistoryOn: false, } diff --git a/manager/cmd/manager_test.go b/manager/cmd/manager_test.go index a7274c2..8cae6c2 100755 --- a/manager/cmd/manager_test.go +++ b/manager/cmd/manager_test.go @@ -339,7 +339,7 @@ func TestGetPrometheusAlerts(t *testing.T) { commandReady := make(chan bool, 1) command := "cli/alarm-cli" - args := []string {"gapam", "--active", "true", "--inhibited", "true", "--silenced", "--unprocessed", "true", "true", "--host", "localhost", "--port", "9093", "flushall"} + args := []string{"gapam", "--active", "true", "--inhibited", "true", "--silenced", "--unprocessed", "true", "true", "--host", "localhost", "--port", "9093", "flushall"} ExecCLICommand(commandReady, command, args...) <-commandReady @@ -398,8 +398,8 @@ func CreatePromAlertSimulator2(t *testing.T, method, url string) *httptest.Serve w.WriteHeader(200) // Read alerts from file payload, err := readJSONFromFile("../testresources/prometheus-alerts.json") - if err != nil { - t.Error("Failed to send response: ", err) + if err != nil { + t.Error("Failed to send response: ", err) } _, err = w.Write(payload) if err != nil { diff --git a/manager/cmd/restapi.go b/manager/cmd/restapi.go index 80f4997..33aac0d 100755 --- a/manager/cmd/restapi.go +++ b/manager/cmd/restapi.go @@ -180,7 +180,7 @@ func (a *AlarmManager) doAction(w http.ResponseWriter, r *http.Request, isRaiseA m.AlarmTime = time.Now().UnixNano() } - _, err := a.ProcessAlarm(&m) + _, err := a.ProcessAlarm(&AlarmInformation{m, alarm.AlarmDefinition{}}) return err } diff --git a/manager/cmd/types.go b/manager/cmd/types.go index c415fda..7af8ec6 100755 --- a/manager/cmd/types.go +++ b/manager/cmd/types.go @@ -27,22 +27,28 @@ import ( ) type AlarmManager struct { - amHost string - amBaseUrl string - amSchemes []string - alertInterval int - activeAlarms []alarm.AlarmMessage - alarmHistory []alarm.AlarmMessage - mutex sync.Mutex - rmrReady bool - postClear bool - maxActiveAlarms int - maxAlarmHistory int - alarmClient *alarm.RICAlarm - exceededActiveAlarmOn bool + amHost string + amBaseUrl string + amSchemes []string + alertInterval int + activeAlarms []AlarmInformation + alarmHistory []AlarmInformation + uniqueAlarmId int + mutex sync.Mutex + rmrReady bool + postClear bool + maxActiveAlarms int + maxAlarmHistory int + alarmClient *alarm.RICAlarm + exceededActiveAlarmOn bool exceededAlarmHistoryOn bool } +type AlarmInformation struct { + alarm.AlarmMessage + alarm.AlarmDefinition +} + type AlertStatus string const ( -- 2.16.6