Adding Some sleep to take the correct rmr init
[ric-plt/alarm-go.git] / manager / cmd / manager.go
index 789093c..1909dc0 100755 (executable)
@@ -27,9 +27,11 @@ import (
        "io/ioutil"
        "net/http"
        "os"
+       "strconv"
+       "strings"
        "time"
 
-       "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
+       "gerrit.o-ran-sc.org/r/ric-plt/alarm-go.git/alarm"
        app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
        clientruntime "github.com/go-openapi/runtime/client"
        "github.com/go-openapi/strfmt"
@@ -39,13 +41,51 @@ import (
        "github.com/spf13/viper"
 )
 
+func (a *AlarmManager) ClearExpiredAlarms(m AlarmNotification, idx int, mLocked bool) bool {
+       d, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]
+       if !ok || d.TimeToLive == 0 {
+               return false
+       }
+
+       elapsed := (time.Now().UnixNano() - m.AlarmTime) / 1e9
+       if int(elapsed) >= d.TimeToLive {
+               app.Logger.Info("Alarm (sp=%d id=%d) with TTL=%d expired, clearing ...", m.Alarm.SpecificProblem, m.AlarmId, d.TimeToLive)
+
+               m.AlarmAction = alarm.AlarmActionClear
+               m.AlarmTime = time.Now().UnixNano()
+
+               if !mLocked { // For testing purpose
+                       a.mutex.Lock()
+               }
+               a.ProcessClearAlarm(&m, d, idx)
+               return true
+       }
+       return false
+}
+
+func (a *AlarmManager) StartTTLTimer(interval int) {
+       tick := time.Tick(time.Duration(interval) * time.Second)
+       for range tick {
+               a.mutex.Lock()
+               for idx, m := range a.activeAlarms {
+                       if a.ClearExpiredAlarms(m, idx, true) {
+                               a.mutex.Lock() // ClearExpiredAlarms unlocks the mutex, so re-lock here
+                               continue
+                       }
+               }
+               a.mutex.Unlock()
+       }
+}
+
 func (a *AlarmManager) StartAlertTimer() {
        tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
        for range tick {
                a.mutex.Lock()
+
+               a.ProcessAlerts()
                for _, m := range a.activeAlarms {
                        app.Logger.Info("Re-raising alarm: %v", m)
-                       a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
+                       a.PostAlert(a.GenerateAlertLabels(m.AlarmId, m.Alarm, AlertStatusActive, m.AlarmTime))
                }
                a.mutex.Unlock()
        }
@@ -149,7 +189,7 @@ func (a *AlarmManager) ProcessRaiseAlarm(m *AlarmNotification, alarmDef *alarm.A
        if app.Config.GetBool("controls.noma.enabled") {
                return a.PostAlarm(m)
        }
-       return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
+       return a.PostAlert(a.GenerateAlertLabels(m.AlarmId, m.Alarm, AlertStatusActive, m.AlarmTime))
 }
 
 func (a *AlarmManager) ProcessClearAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition, idx int) (*alert.PostAlertsOK, error) {
@@ -230,7 +270,7 @@ func (a *AlarmManager) GenerateThresholdAlarm(sp int, data string) bool {
        thresholdMessage := alarm.AlarmMessage{
                Alarm:       thresholdAlarm,
                AlarmAction: alarm.AlarmActionRaise,
-               AlarmTime:   (time.Now().UnixNano()),
+               AlarmTime:   time.Now().UnixNano(),
        }
        alarmDef := alarm.RICAlarmDefinitions[sp]
        alarmId := a.GenerateAlarmId()
@@ -285,22 +325,29 @@ func (a *AlarmManager) PostAlarm(m *AlarmNotification) (*alert.PostAlertsOK, err
        return nil, err
 }
 
-func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
+func (a *AlarmManager) GenerateAlertLabels(alarmId int, newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
+       if strings.Contains(newAlarm.ApplicationId, "FM") {
+               app.Logger.Info("Alarm '%d' is originated from FM, ignoring ...", alarmId)
+               return models.LabelSet{}, models.LabelSet{}
+       }
+
        alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
        amLabels := models.LabelSet{
                "status":      string(status),
                "alertname":   alarmDef.AlarmText,
                "severity":    string(newAlarm.PerceivedSeverity),
                "service":     fmt.Sprintf("%s/%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
+               "info":        newAlarm.IdentifyingInfo,
                "system_name": "RIC",
        }
        amAnnotations := models.LabelSet{
-               "alarm_id":         fmt.Sprintf("%d", alarmDef.AlarmId),
+               "alarm_id":         fmt.Sprintf("%d", alarmId),
                "specific_problem": fmt.Sprintf("%d", newAlarm.SpecificProblem),
                "event_type":       alarmDef.EventType,
                "identifying_info": newAlarm.IdentifyingInfo,
                "additional_info":  newAlarm.AdditionalInfo,
                "description":      fmt.Sprintf("%s:%s", newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo),
+               "summary":          newAlarm.IdentifyingInfo,
                "instructions":     alarmDef.OperationInstructions,
                "timestamp":        fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
        }
@@ -314,9 +361,13 @@ func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager {
 }
 
 func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) {
+       if len(amLabels) == 0 || len(amAnnotations) == 0 {
+               return &alert.PostAlertsOK{}, nil
+       }
+
        pa := &models.PostableAlert{
                Alert: models.Alert{
-                       GeneratorURL: strfmt.URI(""),
+                       GeneratorURL: strfmt.URI("http://service-ricplt-alarmmanager-http.ricplt:8080/ric/v1/alarms"),
                        Labels:       amLabels,
                },
                Annotations: amAnnotations,
@@ -326,23 +377,109 @@ func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*aler
        app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations)
        ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams)
        if err != nil {
-               app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err)
+               app.Logger.Error("Posting alerts to '%s/%s' failed: %v", a.amHost, a.amBaseUrl, err)
        }
        return ok, err
 }
 
+func (a *AlarmManager) GetAlerts() (*alert.GetAlertsOK, error) {
+       active := true
+       alertParams := alert.NewGetAlertsParams()
+       alertParams.Active = &active
+       resp, err := a.NewAlertmanagerClient().Alert.GetAlerts(alertParams)
+       if err != nil {
+               app.Logger.Error("Getting alerts from '%s/%s' failed: %v", a.amHost, a.amBaseUrl, err)
+               return resp, nil
+       }
+       app.Logger.Info("GetAlerts: %+v", resp)
+
+       return resp, err
+}
+
+func (a *AlarmManager) ProcessAlerts() {
+       resp, err := a.GetAlerts()
+       if err != nil || resp == nil {
+               app.Logger.Error("Getting alerts from '%s/%s' failed: %v", a.amHost, a.amBaseUrl, err)
+               return
+       }
+
+       var buildAlarm = func(alert *models.GettableAlert) alarm.Alarm {
+               a := alarm.Alarm{ManagedObjectId: "SEP", ApplicationId: "FM"}
+
+               if v, ok := alert.Alert.Labels["specific_problem"]; ok {
+                       sp, _ := strconv.Atoi(v)
+                       a.SpecificProblem = sp
+               }
+
+               if v, ok := alert.Alert.Labels["severity"]; ok {
+                       a.PerceivedSeverity = alarm.Severity(fmt.Sprint(v))
+               }
+
+               if v, ok := alert.Alert.Labels["name"]; ok {
+                       a.AdditionalInfo = v
+               }
+
+               if v, ok := alert.Annotations["description"]; ok {
+                       a.IdentifyingInfo = v
+               }
+
+               return a
+       }
+
+       // Remove cleared alerts first
+       for _, m := range a.activeAlarms {
+               if m.ApplicationId != "FM" {
+                       continue
+               }
+
+               found := false
+               for _, alert := range resp.Payload {
+                       v, ok := alert.Alert.Labels["service"]
+                       if !ok || !strings.Contains(v, "FM") {
+                               continue
+                       }
+
+                       a := buildAlarm(alert)
+                       if m.ManagedObjectId == a.ManagedObjectId && m.ApplicationId == a.ApplicationId &&
+                               m.SpecificProblem == a.SpecificProblem && m.IdentifyingInfo == a.IdentifyingInfo {
+                               found = true
+                               break
+                       }
+               }
+
+               if !found {
+                       m.AlarmAction = alarm.AlarmActionClear
+                       go a.ProcessAlarm(&m)
+               }
+       }
+
+       for _, alert := range resp.Payload {
+               v, ok := alert.Alert.Labels["service"]
+               if ok && strings.Contains(v, "FM") {
+                       m := alarm.AlarmMessage{Alarm: buildAlarm(alert), AlarmAction: alarm.AlarmActionRaise, AlarmTime: time.Now().UnixNano()}
+                       go a.ProcessAlarm(&AlarmNotification{m, alarm.AlarmDefinition{}})
+               }
+       }
+}
+
 func (a *AlarmManager) StatusCB() bool {
        if !a.rmrReady {
                app.Logger.Info("RMR not ready yet!")
        }
-
        return a.rmrReady
 }
 
 func (a *AlarmManager) ConfigChangeCB(configparam string) {
-
        a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
+       if a.maxActiveAlarms == 0 {
+               a.maxActiveAlarms = 5000
+       }
+
        a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
+       if a.maxAlarmHistory == 0 {
+               a.maxAlarmHistory = 20000
+       }
+
        a.alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
        a.amHost = viper.GetString("controls.promAlertManager.address")
 
@@ -375,6 +512,7 @@ func (a *AlarmManager) ReadAlarmDefinitionFromJson() {
                                        ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
                                        ricAlarmDefintion.RaiseDelay = alarmDefinition.RaiseDelay
                                        ricAlarmDefintion.ClearDelay = alarmDefinition.ClearDelay
+                                       ricAlarmDefintion.TimeToLive = alarmDefinition.TimeToLive
                                        alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
                                }
                        }
@@ -390,7 +528,7 @@ func (a *AlarmManager) ReadAlarmInfoFromPersistentVolume() {
        var alarmpersistentinfo AlarmPersistentInfo
        byteValue, rerr := ioutil.ReadFile(a.alarmInfoPvFile)
        if rerr != nil {
-               app.Logger.Error("ararminfo.json file read error %v", rerr)
+               app.Logger.Info("Unable to read alarminfo.json : %v", rerr)
        } else {
                err := json.Unmarshal(byteValue, &alarmpersistentinfo)
                if err != nil {
@@ -410,8 +548,10 @@ func (a *AlarmManager) WriteAlarmInfoToPersistentVolume() {
        alarmpersistentinfo.UniqueAlarmId = a.uniqueAlarmId
        alarmpersistentinfo.ActiveAlarms = make([]AlarmNotification, len(a.activeAlarms))
        alarmpersistentinfo.AlarmHistory = make([]AlarmNotification, len(a.alarmHistory))
+
        copy(alarmpersistentinfo.ActiveAlarms, a.activeAlarms)
        copy(alarmpersistentinfo.AlarmHistory, a.alarmHistory)
+
        wdata, err := json.MarshalIndent(alarmpersistentinfo, "", " ")
        if err != nil {
                app.Logger.Error("alarmpersistentinfo json marshal error %v", err)
@@ -423,7 +563,7 @@ func (a *AlarmManager) WriteAlarmInfoToPersistentVolume() {
        }
 }
 
-func (a *AlarmManager) Run(sdlcheck bool) {
+func (a *AlarmManager) Run(sdlcheck bool, ttlInterval int) {
        app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
        app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
        app.Resource.InjectStatusCb(a.StatusCB)
@@ -432,23 +572,17 @@ func (a *AlarmManager) Run(sdlcheck bool) {
        alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
        a.ReadAlarmDefinitionFromJson()
 
-       app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
-       app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
-       app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
-       app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
-       app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
-       app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
-       app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
-       app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
-       app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
-       app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET")
+       a.InjectRoutes()
 
        // Start background timer for re-raising alerts
        go a.StartAlertTimer()
+       go a.StartTTLTimer(ttlInterval)
+
        a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
 
        a.ReadAlarmInfoFromPersistentVolume()
 
+    time.Sleep(8 * time.Second)
        app.RunWithParams(a, sdlcheck)
 }
 
@@ -461,6 +595,16 @@ func NewAlarmManager(amHost string, alertInterval int, clearAlarm bool) *AlarmMa
                amHost = viper.GetString("controls.promAlertManager.address")
        }
 
+       maxActiveAlarms := app.Config.GetInt("controls.maxActiveAlarms")
+       if maxActiveAlarms == 0 {
+               maxActiveAlarms = 5000
+       }
+
+       maxAlarmHistory := app.Config.GetInt("controls.maxAlarmHistory")
+       if maxAlarmHistory == 0 {
+               maxAlarmHistory = 20000
+       }
+
        return &AlarmManager{
                rmrReady:               false,
                postClear:              clearAlarm,
@@ -471,8 +615,8 @@ func NewAlarmManager(amHost string, alertInterval int, clearAlarm bool) *AlarmMa
                activeAlarms:           make([]AlarmNotification, 0),
                alarmHistory:           make([]AlarmNotification, 0),
                uniqueAlarmId:          0,
-               maxActiveAlarms:        app.Config.GetInt("controls.maxActiveAlarms"),
-               maxAlarmHistory:        app.Config.GetInt("controls.maxAlarmHistory"),
+               maxActiveAlarms:        maxActiveAlarms,
+               maxAlarmHistory:        maxAlarmHistory,
                exceededActiveAlarmOn:  false,
                exceededAlarmHistoryOn: false,
                alarmInfoPvFile:        app.Config.GetString("controls.alarmInfoPvFile"),
@@ -481,5 +625,5 @@ func NewAlarmManager(amHost string, alertInterval int, clearAlarm bool) *AlarmMa
 
 // Main function
 func main() {
-       NewAlarmManager("", 0, true).Run(true)
+       NewAlarmManager("", 0, true).Run(true, 10)
 }