LN0739_FM_FR8: relaxing the active alarm and alarm history restrictions 67/4767/5
authorvipin <vipin.mavila@nokia.com>
Tue, 22 Sep 2020 12:04:59 +0000 (12:04 +0000)
committervipin <vipin.mavila@nokia.com>
Thu, 24 Sep 2020 09:56:06 +0000 (09:56 +0000)
- new alarms can still be added if max active alarm threshold or alarm
  history threshold is reached. Alarm manager raised new alarm under
  such situations.
- Review comments closed.
- Review comments closed.

Change-Id: I885418dcc19c587d1139f8251eda735b4a2bba00
Signed-off-by: vipin <vipin.mavila@nokia.com>
alarm/types.go
build/build_ubuntu.sh
cli/alarm-cli.go
go.mod
manager/cmd/manager.go
manager/cmd/manager_test.go
manager/cmd/restapi.go
manager/cmd/types.go
schemas/alarm-schema.json

index dc77d43..149671a 100755 (executable)
@@ -67,6 +67,11 @@ type AlarmMessage struct {
        AlarmTime int64
 }
 
+type AlarmConfigParams struct {
+        MaxActiveAlarms int `json:"maxactivealarms"`
+        MaxAlarmHistory int `json:"maxalarmhistory"`
+}
+
 // RICAlarm is an alarm instance
 type RICAlarm struct {
        moId       string
@@ -88,6 +93,8 @@ const (
        TCP_CONNECTIVITY_LOST_TO_DBAAS int = 8005
        E2_CONNECTIVITY_LOST_TO_GNODEB int = 8006
        E2_CONNECTIVITY_LOST_TO_ENODEB int = 8007
+       ACTIVE_ALARM_EXCEED_MAX_THRESHOLD int = 8008
+       ALARM_HISTORY_EXCEED_MAX_THRESHOLD int = 8009
 )
 
 type AlarmDefinition struct {
@@ -122,6 +129,18 @@ var RICAlarmDefinitions = map[int]AlarmDefinition{
                EventType:             "Communication error",
                OperationInstructions: "Not defined",
        },
+       ACTIVE_ALARM_EXCEED_MAX_THRESHOLD: {
+               AlarmId:               ACTIVE_ALARM_EXCEED_MAX_THRESHOLD,
+               AlarmText:             "ACTIVE ALARM EXCEED MAX THRESHOLD",
+               EventType:             "Warning",
+               OperationInstructions: "Not defined",
+       },
+       ALARM_HISTORY_EXCEED_MAX_THRESHOLD: {
+               AlarmId:               ALARM_HISTORY_EXCEED_MAX_THRESHOLD,
+               AlarmText:             "ALARM HISTORY EXCEED MAX THRESHOLD",
+               EventType:             "Warning",
+               OperationInstructions: "Not defined",
+       },
 }
 
 const (
index 3e94016..29b9d77 100755 (executable)
@@ -22,11 +22,11 @@ set -eux
 echo "--> build_ubuntu.sh starts"
 
 # Install RMR from deb packages at packagecloud.io
-rmr=rmr_4.0.2_amd64.deb
+rmr=rmr_4.1.2_amd64.deb
 wget --content-disposition  https://packagecloud.io/o-ran-sc/release/packages/debian/stretch/$rmr/download.deb
 sudo dpkg -i $rmr
 rm $rmr
-rmrdev=rmr-dev_4.0.2_amd64.deb
+rmrdev=rmr-dev_4.1.2_amd64.deb
 wget --content-disposition https://packagecloud.io/o-ran-sc/release/packages/debian/stretch/$rmrdev/download.deb
 sudo dpkg -i $rmrdev
 rm $rmrdev
index 665f968..97ccb91 100755 (executable)
@@ -74,6 +74,18 @@ func main() {
                        postAlarm(flags, readAlarmParams(flags, true), alarm.AlarmActionClear)
                })
 
+       // Configure an alarm manager
+       commando.
+               Register("configure").
+               SetShortDescription("Configure alarm manager with given parameters").
+               AddFlag("mal", "max active alarms", commando.Int, nil).
+               AddFlag("mah", "max alarm history", commando.Int, nil).
+               AddFlag("host", "Alarm manager host address", commando.String, "localhost").
+               AddFlag("port", "Alarm manager host address", commando.String, "8080").
+               SetAction(func(args map[string]commando.ArgValue, flags map[string]commando.FlagValue) {
+                       postAlarmConfig(flags)
+               })
+
        // parse command-line arguments
        commando.Parse(nil)
 }
@@ -162,3 +174,24 @@ func displayAlarms(alarms []alarm.AlarmMessage, isHistory bool) {
        t.SetStyle(table.StyleColoredBright)
        t.Render()
 }
+
+func postAlarmConfig(flags map[string]commando.FlagValue) {
+       host, _ := flags["host"].GetString()
+       port, _ := flags["port"].GetString()
+       maxactivealarms, _ := flags["mal"].GetInt()
+       maxalarmhistory, _ := flags["mah"].GetInt()
+       targetUrl := fmt.Sprintf("http://%s:%s/ric/v1/alarms/config", host, port)
+
+       m := alarm.AlarmConfigParams{MaxActiveAlarms: maxactivealarms, MaxAlarmHistory: maxalarmhistory}
+       jsonData, err := json.Marshal(m)
+       if err != nil {
+               fmt.Println("json.Marshal failed: %v", err)
+               return
+       }
+
+       resp, err := http.Post(targetUrl, "application/json", bytes.NewBuffer(jsonData))
+       if err != nil || resp == nil {
+               fmt.Println("Couldn't fetch post alarm configuration due to error: %v", err)
+               return
+       }
+}
diff --git a/go.mod b/go.mod
index 7d78729..2827b3e 100644 (file)
--- a/go.mod
+++ b/go.mod
@@ -16,7 +16,8 @@ require (
        gerrit.o-ran-sc.org/r/ric-plt/xapp-frame v0.0.0-00010101000000-000000000000
        github.com/go-openapi/runtime v0.19.11
        github.com/go-openapi/strfmt v0.19.4
-       github.com/jedib0t/go-pretty v4.3.0+incompatible
+       github.com/gorilla/mux v1.7.1
+       github.com/jedib0t/go-pretty v4.3.0+incompatible // indirect
        github.com/mattn/go-runewidth v0.0.9 // indirect
        github.com/prometheus/alertmanager v0.20.0
        github.com/spf13/viper v1.6.2
index 3ca2d84..9a42187 100755 (executable)
@@ -133,13 +133,22 @@ func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
        a.mutex.Lock()
        defer a.mutex.Unlock()
 
-       // If maximum number of active alarms is reached, purge the oldest alarm
-       if len(a.activeAlarms) >= viper.GetInt("controls.maxActiveAlarms") {
-               a.activeAlarms = a.RemoveAlarm(a.activeAlarms, 0, "active")
+       /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
+          The attempt to raise the alarm next time will be supressed when found as duplicate. */
+       if len(a.activeAlarms) >= a.maxActiveAlarms {
+               app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold")
+               actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "active alarms full")
+               actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
+               a.activeAlarms = append(a.activeAlarms, actAlarmMessage)
+               a.alarmHistory = append(a.alarmHistory, actAlarmMessage)
        }
 
-       if len(a.alarmHistory) >= viper.GetInt("controls.maxAlarmHistory") {
-               a.alarmHistory = a.RemoveAlarm(a.alarmHistory, 0, "history")
+       if len(a.alarmHistory) >= a.maxAlarmHistory {
+               app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
+               histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "alarm history full")
+               histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
+               a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
+               a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
        }
 
        // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
@@ -199,19 +208,34 @@ func (a *AlarmManager) StatusCB() bool {
        return a.rmrReady
 }
 
+func (a *AlarmManager) ConfigChangeCB(configparam string) {
+
+       a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
+       a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
+
+       app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
+       app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
+
+       return
+}
+
 func (a *AlarmManager) Run(sdlcheck bool) {
        app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
        app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
        app.Resource.InjectStatusCb(a.StatusCB)
+       app.AddConfigChangeListener(a.ConfigChangeCB)
 
        app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
        app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
        app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
        app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
+       app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
+       app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
 
        // Start background timer for re-raising alerts
        a.postClear = sdlcheck
        go a.StartAlertTimer()
+       a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
 
        app.RunWithParams(a, sdlcheck)
 }
@@ -226,13 +250,15 @@ func NewAlarmManager(amHost string, alertInterval int) *AlarmManager {
        }
 
        return &AlarmManager{
-               rmrReady:      false,
-               amHost:        amHost,
-               amBaseUrl:     viper.GetString("controls.promAlertManager.baseUrl"),
-               amSchemes:     []string{viper.GetString("controls.promAlertManager.schemes")},
-               alertInterval: alertInterval,
-               activeAlarms:  make([]alarm.AlarmMessage, 0),
-               alarmHistory:  make([]alarm.AlarmMessage, 0),
+               rmrReady:        false,
+               amHost:          amHost,
+               amBaseUrl:       viper.GetString("controls.promAlertManager.baseUrl"),
+               amSchemes:       []string{viper.GetString("controls.promAlertManager.schemes")},
+               alertInterval:   alertInterval,
+               activeAlarms:    make([]alarm.AlarmMessage, 0),
+               alarmHistory:    make([]alarm.AlarmMessage, 0),
+               maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
+               maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
        }
 }
 
index c048f4b..26cf5c5 100755 (executable)
@@ -33,7 +33,7 @@ import (
        "strings"
        "testing"
        "time"
-
+        "github.com/gorilla/mux"
        "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
        "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
        "github.com/prometheus/alertmanager/api/v2/models"
@@ -56,6 +56,7 @@ func TestMain(M *testing.M) {
        }
 
        alarmer, _ = alarm.InitAlarm("my-pod", "my-app")
+       alarmManager.alarmClient = alarmer
        time.Sleep(time.Duration(5) * time.Second)
        eventChan = make(chan string)
 
@@ -130,8 +131,10 @@ func TestAlarmsSuppresedSucess(t *testing.T) {
        assert.Nil(t, alarmer.Raise(a), "raise failed")
 
        VerifyAlarm(t, a, 1)
+       assert.Nil(t, alarmer.Clear(a), "clear failed")
 }
 
+
 func TestInvalidAlarms(t *testing.T) {
        a := alarmer.NewAlarm(1111, alarm.SeverityMajor, "Some App data", "eth 0 1")
        assert.Nil(t, alarmer.Raise(a), "raise failed")
@@ -153,6 +156,38 @@ func TestStatusCallback(t *testing.T) {
        assert.Equal(t, true, alarmManager.StatusCB())
 }
 
+func TestActiveAlarmMaxThresholds(t *testing.T) {
+       xapp.Logger.Info("TestActiveAlarmMaxThresholds")
+       ts := CreatePromAlertSimulator(t, "POST", "/api/v2/alerts", http.StatusOK, models.LabelSet{})
+       alarmManager.maxActiveAlarms = 0
+       alarmManager.maxAlarmHistory = 10
+
+       a := alarmer.NewAlarm(alarm.E2_CONNECTIVITY_LOST_TO_GNODEB, alarm.SeverityCritical, "Some Application data", "eth 0 2")
+       assert.Nil(t, alarmer.Raise(a), "raise failed")
+
+       var alarmConfigParams alarm.AlarmConfigParams
+       req, _ := http.NewRequest("GET", "/ric/v1/alarms/config", nil)
+       req = mux.SetURLVars(req, nil)
+       handleFunc := http.HandlerFunc(alarmManager.GetAlarmConfig)
+       response := executeRequest(req, handleFunc)
+
+       // Check HTTP Status Code
+       checkResponseCode(t, http.StatusOK, response.Code)
+
+       // Decode the json output from handler
+       json.NewDecoder(response.Body).Decode(&alarmConfigParams)
+       if alarmConfigParams.MaxActiveAlarms != 0 || alarmConfigParams.MaxAlarmHistory != 10 {
+               t.Errorf("Incorrect alarm thresholds")
+       }
+
+       time.Sleep(time.Duration(1) * time.Second)
+       alarmManager.maxActiveAlarms = 5000
+       alarmManager.maxAlarmHistory = 20000
+       VerifyAlarm(t, a, 2)
+       VerifyAlarm(t, a, 2)
+       ts.Close()
+}
+
 func VerifyAlarm(t *testing.T, a alarm.Alarm, expectedCount int) string {
        receivedAlert := waitForEvent()
 
@@ -204,3 +239,20 @@ func fireEvent(t *testing.T, body io.ReadCloser) {
 
        eventChan <- fmt.Sprintf("%s", reqBody)
 }
+
+func executeRequest(req *http.Request, handleR http.HandlerFunc) *httptest.ResponseRecorder {
+       rr := httptest.NewRecorder()
+
+       handleR.ServeHTTP(rr, req)
+
+       return rr
+}
+
+func checkResponseCode(t *testing.T, expected, actual int) bool {
+       if expected != actual {
+               t.Errorf("Expected response code %d. Got %d\n", expected, actual)
+               return false
+       }
+       return true
+}
+
index 6463a2c..c455e87 100755 (executable)
@@ -104,3 +104,26 @@ func (a *AlarmManager) HandleViaRmr(d alarm.Alarm, isRaiseAlarm bool) error {
 
        return nil
 }
+
+func (a *AlarmManager) SetAlarmConfig(w http.ResponseWriter, r *http.Request) {
+       var m alarm.AlarmConfigParams
+       if err := json.NewDecoder(r.Body).Decode(&m); err != nil {
+               app.Logger.Error("json.NewDecoder failed: %v", err)
+       } else {
+               a.maxActiveAlarms = m.MaxActiveAlarms
+               a.maxAlarmHistory = m.MaxAlarmHistory
+               app.Logger.Debug("new maxActiveAlarms = %v", a.maxActiveAlarms)
+               app.Logger.Debug("new maxAlarmHistory = %v", a.maxAlarmHistory)
+               a.respondWithJSON(w, http.StatusOK, err)
+       }
+}
+
+func (a *AlarmManager) GetAlarmConfig(w http.ResponseWriter, r *http.Request) {
+       var m alarm.AlarmConfigParams
+
+       m.MaxActiveAlarms = a.maxActiveAlarms
+       m.MaxAlarmHistory = a.maxAlarmHistory
+
+       a.respondWithJSON(w, http.StatusOK, m)
+       return
+}
index 68b9e0d..40a110f 100755 (executable)
@@ -27,15 +27,18 @@ import (
 )
 
 type AlarmManager struct {
-       amHost        string
-       amBaseUrl     string
-       amSchemes     []string
-       alertInterval int
-       activeAlarms  []alarm.AlarmMessage
-       alarmHistory  []alarm.AlarmMessage
-       mutex         sync.Mutex
-       rmrReady      bool
-       postClear     bool
+       amHost          string
+       amBaseUrl       string
+       amSchemes       []string
+       alertInterval   int
+       activeAlarms    []alarm.AlarmMessage
+       alarmHistory    []alarm.AlarmMessage
+       mutex           sync.Mutex
+       rmrReady        bool
+       postClear       bool
+       maxActiveAlarms int
+       maxAlarmHistory int
+       alarmClient     *alarm.RICAlarm
 }
 
 type AlertStatus string
@@ -47,3 +50,4 @@ const (
 
 var Version string
 var Hash string
+
index 5e999ad..ca01b1c 100644 (file)
@@ -70,7 +70,7 @@
     "identifyingInfo": {
       "type": "string",
       "title": "The identifyingInfo schema",
-      "description": "Identifying additional information, which is part of alarm identity.",
+      "description": "Identifying additional information which is part of alarm identity.",
       "default": ""
     },
     "AlarmAction": {