"github.com/spf13/viper"
)
+func (a *AlarmManager) ClearExpiredAlarms(m AlarmNotification, idx int, mLocked bool) bool {
+ d, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]
+ if !ok || d.TimeToLive == 0 {
+ return false
+ }
+
+ elapsed := (time.Now().UnixNano() - m.AlarmTime) / 1e9
+ if int(elapsed) >= d.TimeToLive {
+ app.Logger.Info("Alarm (sp=%d id=%d) with TTL=%d expired, clearing ...", m.Alarm.SpecificProblem, m.AlarmId, d.TimeToLive)
+
+ m.AlarmAction = alarm.AlarmActionClear
+ m.AlarmTime = time.Now().UnixNano()
+
+ if !mLocked { // For testing purpose
+ a.mutex.Lock()
+ }
+ a.ProcessClearAlarm(&m, d, idx)
+ return true
+ }
+ return false
+}
+
+func (a *AlarmManager) StartTTLTimer(interval int) {
+ tick := time.Tick(time.Duration(interval) * time.Second)
+ for range tick {
+ a.mutex.Lock()
+ for idx, m := range a.activeAlarms {
+ if a.ClearExpiredAlarms(m, idx, true) {
+ a.mutex.Lock() // ClearExpiredAlarms unlocks the mutex, so re-lock here
+ continue
+ }
+ }
+ a.mutex.Unlock()
+ }
+}
+
func (a *AlarmManager) StartAlertTimer() {
tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
for range tick {
a.mutex.Lock()
for _, m := range a.activeAlarms {
app.Logger.Info("Re-raising alarm: %v", m)
- a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
+ a.PostAlert(a.GenerateAlertLabels(m.AlarmId, m.Alarm, AlertStatusActive, m.AlarmTime))
}
a.mutex.Unlock()
}
}
// Clear alarm if found from active alarm list
- if m.AlarmAction == alarm.AlarmActionClear {
- if found {
- if a.ProcessClearAlarm(m, alarmDef, idx) == false {
- return nil, nil
- }
- if a.postClear {
- a.mutex.Unlock()
-
- // Send alarm notification to NOMA, if enabled
- if app.Config.GetBool("controls.noma.enabled") {
- m.PerceivedSeverity = alarm.SeverityCleared
- return a.PostAlarm(m)
- }
- return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
- }
- }
- app.Logger.Info("No matching active alarm found, suppressing ...")
- a.mutex.Unlock()
- return nil, nil
+ if found && m.AlarmAction == alarm.AlarmActionClear {
+ return a.ProcessClearAlarm(m, alarmDef, idx)
}
// New alarm -> update active alarms and post to Alert Manager
if m.AlarmAction == alarm.AlarmActionRaise {
- if a.ProcessRaiseAlarm(m, alarmDef) == false {
- return nil, nil
- }
- // Send alarm notification to NOMA, if enabled
- if app.Config.GetBool("controls.noma.enabled") {
- return a.PostAlarm(m)
- }
- return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
+ return a.ProcessRaiseAlarm(m, alarmDef)
}
a.mutex.Unlock()
return nil, nil
}
-func (a *AlarmManager)ProcessRaiseAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition) bool {
+func (a *AlarmManager) ProcessRaiseAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition) (*alert.PostAlertsOK, error) {
app.Logger.Debug("Raise alarmDef.RaiseDelay = %v, AlarmNotification = %v", alarmDef.RaiseDelay, *m)
+
// RaiseDelay > 0 in an alarm object in active alarm table indicates that raise delay is still ongoing for the alarm
m.AlarmDefinition.RaiseDelay = alarmDef.RaiseDelay
a.UpdateAlarmFields(a.GenerateAlarmId(), m)
a.UpdateActiveAlarmList(m)
a.mutex.Unlock()
+
if alarmDef.RaiseDelay > 0 {
timerDelay(alarmDef.RaiseDelay)
a.mutex.Lock()
} else {
app.Logger.Debug("Alarm deleted during raise delay. AlarmNotification = %v", *m)
a.mutex.Unlock()
- return false
+ return nil, nil
}
}
+
m.AlarmDefinition.RaiseDelay = 0
a.UpdateAlarmHistoryList(m)
- a.WriteAlarmInfoToPersistentVolume()
- return true
+ a.WriteAlarmInfoToPersistentVolume()
+
+ // Send alarm notification to NOMA, if enabled
+ if app.Config.GetBool("controls.noma.enabled") {
+ return a.PostAlarm(m)
+ }
+ return a.PostAlert(a.GenerateAlertLabels(m.AlarmId, m.Alarm, AlertStatusActive, m.AlarmTime))
}
-func (a *AlarmManager)ProcessClearAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition, idx int) bool {
+func (a *AlarmManager) ProcessClearAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition, idx int) (*alert.PostAlertsOK, error) {
app.Logger.Debug("Clear alarmDef.ClearDelay = %v, AlarmNotification = %v", alarmDef.ClearDelay, *m)
if alarmDef.ClearDelay > 0 {
a.mutex.Unlock()
var found bool
idx, found = a.IsMatchFound(m.Alarm)
if !found {
- app.Logger.Debug("Alarm not anymore in the active alarms table. AlarmNotification = %v", *m)
a.mutex.Unlock()
- return false
+ return nil, nil
}
}
a.UpdateAlarmFields(a.activeAlarms[idx].AlarmId, m)
if a.exceededAlarmHistoryOn && m.Alarm.SpecificProblem == alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD {
a.exceededAlarmHistoryOn = false
}
- a.WriteAlarmInfoToPersistentVolume()
- return true
+ a.WriteAlarmInfoToPersistentVolume()
+
+ a.mutex.Unlock()
+ if a.postClear && app.Config.GetBool("controls.noma.enabled") {
+ m.PerceivedSeverity = alarm.SeverityCleared
+ return a.PostAlarm(m)
+ }
+ return nil, nil
}
func timerDelay(delay int) {
thresholdMessage := alarm.AlarmMessage{
Alarm: thresholdAlarm,
AlarmAction: alarm.AlarmActionRaise,
- AlarmTime: (time.Now().UnixNano()),
+ AlarmTime: time.Now().UnixNano(),
}
alarmDef := alarm.RICAlarmDefinitions[sp]
alarmId := a.GenerateAlarmId()
return nil, err
}
-func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
+func (a *AlarmManager) GenerateAlertLabels(alarmId int, newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
amLabels := models.LabelSet{
"status": string(status),
"alertname": alarmDef.AlarmText,
"severity": string(newAlarm.PerceivedSeverity),
"service": fmt.Sprintf("%s/%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
+ "info": newAlarm.IdentifyingInfo,
"system_name": "RIC",
}
amAnnotations := models.LabelSet{
- "alarm_id": fmt.Sprintf("%d", alarmDef.AlarmId),
+ "alarm_id": fmt.Sprintf("%d", alarmId),
"specific_problem": fmt.Sprintf("%d", newAlarm.SpecificProblem),
"event_type": alarmDef.EventType,
"identifying_info": newAlarm.IdentifyingInfo,
if !a.rmrReady {
app.Logger.Info("RMR not ready yet!")
}
-
return a.rmrReady
}
func (a *AlarmManager) ConfigChangeCB(configparam string) {
-
a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
+ if a.maxActiveAlarms == 0 {
+ a.maxActiveAlarms = 5000
+ }
+
a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
+ if a.maxAlarmHistory == 0 {
+ a.maxAlarmHistory = 20000
+ }
+
a.alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
a.amHost = viper.GetString("controls.promAlertManager.address")
ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
ricAlarmDefintion.RaiseDelay = alarmDefinition.RaiseDelay
ricAlarmDefintion.ClearDelay = alarmDefinition.ClearDelay
+ ricAlarmDefintion.TimeToLive = alarmDefinition.TimeToLive
alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
}
}
alarmpersistentinfo.UniqueAlarmId = a.uniqueAlarmId
alarmpersistentinfo.ActiveAlarms = make([]AlarmNotification, len(a.activeAlarms))
alarmpersistentinfo.AlarmHistory = make([]AlarmNotification, len(a.alarmHistory))
+
copy(alarmpersistentinfo.ActiveAlarms, a.activeAlarms)
copy(alarmpersistentinfo.AlarmHistory, a.alarmHistory)
+
wdata, err := json.MarshalIndent(alarmpersistentinfo, "", " ")
if err != nil {
app.Logger.Error("alarmpersistentinfo json marshal error %v", err)
}
}
-func (a *AlarmManager) Run(sdlcheck bool) {
+func (a *AlarmManager) Run(sdlcheck bool, ttlInterval int) {
app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
app.Resource.InjectStatusCb(a.StatusCB)
alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
a.ReadAlarmDefinitionFromJson()
- app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
- app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
- app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
- app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
- app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
- app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
- app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
- app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
- app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
- app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET")
+ a.InjectRoutes()
// Start background timer for re-raising alerts
go a.StartAlertTimer()
+ go a.StartTTLTimer(ttlInterval)
+
a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
a.ReadAlarmInfoFromPersistentVolume()
amHost = viper.GetString("controls.promAlertManager.address")
}
+ maxActiveAlarms := app.Config.GetInt("controls.maxActiveAlarms")
+ if maxActiveAlarms == 0 {
+ maxActiveAlarms = 5000
+ }
+
+ maxAlarmHistory := app.Config.GetInt("controls.maxAlarmHistory")
+ if maxAlarmHistory == 0 {
+ maxAlarmHistory = 20000
+ }
+
return &AlarmManager{
rmrReady: false,
postClear: clearAlarm,
activeAlarms: make([]AlarmNotification, 0),
alarmHistory: make([]AlarmNotification, 0),
uniqueAlarmId: 0,
- maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
- maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
+ maxActiveAlarms: maxActiveAlarms,
+ maxAlarmHistory: maxAlarmHistory,
exceededActiveAlarmOn: false,
exceededAlarmHistoryOn: false,
alarmInfoPvFile: app.Config.GetString("controls.alarmInfoPvFile"),
// Main function
func main() {
- NewAlarmManager("", 0, true).Run(true)
+ NewAlarmManager("", 0, true).Run(true, 10)
}