"encoding/json"
"fmt"
"time"
-
+ "os"
+ "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
+ app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
clientruntime "github.com/go-openapi/runtime/client"
"github.com/go-openapi/strfmt"
"github.com/prometheus/alertmanager/api/v2/client"
"github.com/prometheus/alertmanager/api/v2/client/alert"
"github.com/prometheus/alertmanager/api/v2/models"
"github.com/spf13/viper"
-
- "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
- app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
+ "io/ioutil"
)
func (a *AlarmManager) StartAlertTimer() {
a.mutex.Lock()
for _, m := range a.activeAlarms {
app.Logger.Info("Re-raising alarm: %v", m)
- a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive))
+ a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
}
a.mutex.Unlock()
}
// Suppress duplicate alarms
idx, found := a.IsMatchFound(m.Alarm)
- if found && m.AlarmAction != alarm.AlarmActionClear {
+ if found && m.AlarmAction == alarm.AlarmActionRaise {
app.Logger.Info("Duplicate alarm found, suppressing ...")
- return nil, nil
+ if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
+ // Duplicate with same severity found
+ return nil, nil
+ } else {
+ // Remove duplicate with different severity
+ a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
+ }
}
// Clear alarm if found from active alarm list
if m.AlarmAction == alarm.AlarmActionClear {
if found {
+ a.alarmHistory = append(a.alarmHistory, *m)
a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
if a.postClear {
- return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved))
+ return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
}
}
app.Logger.Info("No matching active alarm found, suppressing ...")
// New alarm -> update active alarms and post to Alert Manager
if m.AlarmAction == alarm.AlarmActionRaise {
a.UpdateAlarmLists(m)
- return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive))
+ return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
}
return nil, nil
a.mutex.Lock()
defer a.mutex.Unlock()
- // If maximum number of active alarms is reached, purge the oldest alarm
- if len(a.activeAlarms) >= viper.GetInt("controls.maxActiveAlarms") {
- a.activeAlarms = a.RemoveAlarm(a.activeAlarms, 0, "active")
+ /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
+ The attempt to raise the alarm next time will be supressed when found as duplicate. */
+ if len(a.activeAlarms) >= a.maxActiveAlarms {
+ app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold")
+ actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "active alarms full")
+ actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
+ a.activeAlarms = append(a.activeAlarms, actAlarmMessage)
+ a.alarmHistory = append(a.alarmHistory, actAlarmMessage)
}
- if len(a.alarmHistory) >= viper.GetInt("controls.maxAlarmHistory") {
- a.alarmHistory = a.RemoveAlarm(a.alarmHistory, 0, "history")
+ if len(a.alarmHistory) >= a.maxAlarmHistory {
+ app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
+ histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "alarm history full")
+ histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
+ a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
+ a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
}
// @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
a.alarmHistory = append(a.alarmHistory, *newAlarm)
}
-func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus) (models.LabelSet, models.LabelSet) {
+func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
amLabels := models.LabelSet{
"status": string(status),
"additional_info": newAlarm.AdditionalInfo,
"summary": alarmDef.EventType,
"instructions": alarmDef.OperationInstructions,
+ "timestamp": fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
}
return amLabels, amAnnotations
return a.rmrReady
}
+func (a *AlarmManager) ConfigChangeCB(configparam string) {
+
+ a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
+ a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
+ a.alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
+ a.amHost = viper.GetString("controls.promAlertManager.address")
+
+ app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
+ app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
+ app.Logger.Debug("ConfigChangeCB: alertInterval %v", a.alertInterval)
+ app.Logger.Debug("ConfigChangeCB: amHost = %v", a.amHost)
+
+ return
+}
+
+func (a *AlarmManager) ReadAlarmDefinitionFromJson() {
+
+ filename := os.Getenv("DEF_FILE")
+ file, err := ioutil.ReadFile(filename)
+ if err == nil {
+ data := RicAlarmDefinitions{}
+ err = json.Unmarshal([]byte(file), &data)
+ if err == nil {
+ for _, alarmDefinition := range data.AlarmDefinitions {
+ _, exists := alarm.RICAlarmDefinitions[alarmDefinition.AlarmId]
+ if exists {
+ app.Logger.Error("ReadAlarmDefinitionFromJson: alarm definition already exists for %v", alarmDefinition.AlarmId)
+ } else {
+ app.Logger.Debug("ReadAlarmDefinitionFromJson: alarm %v", alarmDefinition.AlarmId)
+ ricAlarmDefintion := new(alarm.AlarmDefinition)
+ ricAlarmDefintion.AlarmId = alarmDefinition.AlarmId
+ ricAlarmDefintion.AlarmText = alarmDefinition.AlarmText
+ ricAlarmDefintion.EventType = alarmDefinition.EventType
+ ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
+ alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
+ }
+ }
+ } else {
+ app.Logger.Error("json.Unmarshal failed with error %v", err)
+ }
+ } else {
+ app.Logger.Error("ioutil.ReadFile failed with error %v", err)
+ }
+}
+
func (a *AlarmManager) Run(sdlcheck bool) {
app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
app.Resource.InjectStatusCb(a.StatusCB)
+ app.AddConfigChangeListener(a.ConfigChangeCB)
+
+ alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
+ a.ReadAlarmDefinitionFromJson()
app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
+ app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
+ app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
+ app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
+ app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
+ app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
+ app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET")
// Start background timer for re-raising alerts
a.postClear = sdlcheck
go a.StartAlertTimer()
+ a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
app.RunWithParams(a, sdlcheck)
}
}
return &AlarmManager{
- rmrReady: false,
- amHost: amHost,
- amBaseUrl: viper.GetString("controls.promAlertManager.baseUrl"),
- amSchemes: []string{viper.GetString("controls.promAlertManager.schemes")},
- alertInterval: alertInterval,
- activeAlarms: make([]alarm.AlarmMessage, 0),
- alarmHistory: make([]alarm.AlarmMessage, 0),
+ rmrReady: false,
+ amHost: amHost,
+ amBaseUrl: viper.GetString("controls.promAlertManager.baseUrl"),
+ amSchemes: []string{viper.GetString("controls.promAlertManager.schemes")},
+ alertInterval: alertInterval,
+ activeAlarms: make([]alarm.AlarmMessage, 0),
+ alarmHistory: make([]alarm.AlarmMessage, 0),
+ maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
+ maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
}
}