"io/ioutil"
"net/http"
"os"
+ "strconv"
+ "strings"
"time"
- "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
+ "gerrit.o-ran-sc.org/r/ric-plt/alarm-go.git/alarm"
app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
clientruntime "github.com/go-openapi/runtime/client"
"github.com/go-openapi/strfmt"
"github.com/spf13/viper"
)
+func (a *AlarmManager) ClearExpiredAlarms(m AlarmNotification, idx int, mLocked bool) bool {
+ d, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]
+ if !ok || d.TimeToLive == 0 {
+ return false
+ }
+
+ elapsed := (time.Now().UnixNano() - m.AlarmTime) / 1e9
+ if int(elapsed) >= d.TimeToLive {
+ app.Logger.Info("Alarm (sp=%d id=%d) with TTL=%d expired, clearing ...", m.Alarm.SpecificProblem, m.AlarmId, d.TimeToLive)
+
+ m.AlarmAction = alarm.AlarmActionClear
+ m.AlarmTime = time.Now().UnixNano()
+
+ if !mLocked { // For testing purpose
+ a.mutex.Lock()
+ }
+ a.ProcessClearAlarm(&m, d, idx)
+ return true
+ }
+ return false
+}
+
+func (a *AlarmManager) StartTTLTimer(interval int) {
+ tick := time.Tick(time.Duration(interval) * time.Second)
+ for range tick {
+ a.mutex.Lock()
+ for idx, m := range a.activeAlarms {
+ if a.ClearExpiredAlarms(m, idx, true) {
+ a.mutex.Lock() // ClearExpiredAlarms unlocks the mutex, so re-lock here
+ continue
+ }
+ }
+ a.mutex.Unlock()
+ }
+}
+
func (a *AlarmManager) StartAlertTimer() {
tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
for range tick {
a.mutex.Lock()
+
+ a.ProcessAlerts()
for _, m := range a.activeAlarms {
app.Logger.Info("Re-raising alarm: %v", m)
- a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
+ a.PostAlert(a.GenerateAlertLabels(m.AlarmId, m.Alarm, AlertStatusActive, m.AlarmTime))
}
a.mutex.Unlock()
}
}
// Clear alarm if found from active alarm list
- if m.AlarmAction == alarm.AlarmActionClear {
- if found {
- if a.ProcessClearAlarm(m, alarmDef, idx) == false {
- return nil, nil
- }
- if a.postClear {
- a.mutex.Unlock()
-
- // Send alarm notification to NOMA, if enabled
- if app.Config.GetBool("controls.noma.enabled") {
- m.PerceivedSeverity = alarm.SeverityCleared
- return a.PostAlarm(m)
- }
- return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
- }
- }
- app.Logger.Info("No matching active alarm found, suppressing ...")
- a.mutex.Unlock()
- return nil, nil
+ if found && m.AlarmAction == alarm.AlarmActionClear {
+ return a.ProcessClearAlarm(m, alarmDef, idx)
}
// New alarm -> update active alarms and post to Alert Manager
if m.AlarmAction == alarm.AlarmActionRaise {
- if a.ProcessRaiseAlarm(m, alarmDef) == false {
- return nil, nil
- }
- // Send alarm notification to NOMA, if enabled
- if app.Config.GetBool("controls.noma.enabled") {
- return a.PostAlarm(m)
- }
- return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
+ return a.ProcessRaiseAlarm(m, alarmDef)
}
a.mutex.Unlock()
return nil, nil
}
-func (a *AlarmManager)ProcessRaiseAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition) bool {
+func (a *AlarmManager) ProcessRaiseAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition) (*alert.PostAlertsOK, error) {
app.Logger.Debug("Raise alarmDef.RaiseDelay = %v, AlarmNotification = %v", alarmDef.RaiseDelay, *m)
+
// RaiseDelay > 0 in an alarm object in active alarm table indicates that raise delay is still ongoing for the alarm
m.AlarmDefinition.RaiseDelay = alarmDef.RaiseDelay
a.UpdateAlarmFields(a.GenerateAlarmId(), m)
a.UpdateActiveAlarmList(m)
a.mutex.Unlock()
+
if alarmDef.RaiseDelay > 0 {
timerDelay(alarmDef.RaiseDelay)
a.mutex.Lock()
} else {
app.Logger.Debug("Alarm deleted during raise delay. AlarmNotification = %v", *m)
a.mutex.Unlock()
- return false
+ return nil, nil
}
}
+
m.AlarmDefinition.RaiseDelay = 0
a.UpdateAlarmHistoryList(m)
- a.WriteAlarmInfoToPersistentVolume()
- return true
+ a.WriteAlarmInfoToPersistentVolume()
+
+ // Send alarm notification to NOMA, if enabled
+ if app.Config.GetBool("controls.noma.enabled") {
+ return a.PostAlarm(m)
+ }
+ return a.PostAlert(a.GenerateAlertLabels(m.AlarmId, m.Alarm, AlertStatusActive, m.AlarmTime))
}
-func (a *AlarmManager)ProcessClearAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition, idx int) bool {
+func (a *AlarmManager) ProcessClearAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition, idx int) (*alert.PostAlertsOK, error) {
app.Logger.Debug("Clear alarmDef.ClearDelay = %v, AlarmNotification = %v", alarmDef.ClearDelay, *m)
if alarmDef.ClearDelay > 0 {
a.mutex.Unlock()
var found bool
idx, found = a.IsMatchFound(m.Alarm)
if !found {
- app.Logger.Debug("Alarm not anymore in the active alarms table. AlarmNotification = %v", *m)
a.mutex.Unlock()
- return false
+ return nil, nil
}
}
a.UpdateAlarmFields(a.activeAlarms[idx].AlarmId, m)
if a.exceededAlarmHistoryOn && m.Alarm.SpecificProblem == alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD {
a.exceededAlarmHistoryOn = false
}
- a.WriteAlarmInfoToPersistentVolume()
- return true
+ a.WriteAlarmInfoToPersistentVolume()
+
+ a.mutex.Unlock()
+ if a.postClear && app.Config.GetBool("controls.noma.enabled") {
+ m.PerceivedSeverity = alarm.SeverityCleared
+ return a.PostAlarm(m)
+ }
+ return nil, nil
}
func timerDelay(delay int) {
thresholdMessage := alarm.AlarmMessage{
Alarm: thresholdAlarm,
AlarmAction: alarm.AlarmActionRaise,
- AlarmTime: (time.Now().UnixNano()),
+ AlarmTime: time.Now().UnixNano(),
}
alarmDef := alarm.RICAlarmDefinitions[sp]
alarmId := a.GenerateAlarmId()
return nil, err
}
-func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
+func (a *AlarmManager) GenerateAlertLabels(alarmId int, newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
+ if strings.Contains(newAlarm.ApplicationId, "FM") {
+ app.Logger.Info("Alarm '%d' is originated from FM, ignoring ...", alarmId)
+ return models.LabelSet{}, models.LabelSet{}
+ }
+
alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
amLabels := models.LabelSet{
"status": string(status),
"alertname": alarmDef.AlarmText,
"severity": string(newAlarm.PerceivedSeverity),
"service": fmt.Sprintf("%s/%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
+ "info": newAlarm.IdentifyingInfo,
"system_name": "RIC",
}
amAnnotations := models.LabelSet{
- "alarm_id": fmt.Sprintf("%d", alarmDef.AlarmId),
+ "alarm_id": fmt.Sprintf("%d", alarmId),
"specific_problem": fmt.Sprintf("%d", newAlarm.SpecificProblem),
"event_type": alarmDef.EventType,
"identifying_info": newAlarm.IdentifyingInfo,
"additional_info": newAlarm.AdditionalInfo,
"description": fmt.Sprintf("%s:%s", newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo),
+ "summary": newAlarm.IdentifyingInfo,
"instructions": alarmDef.OperationInstructions,
"timestamp": fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
}
return amLabels, amAnnotations
}
-func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager {
+func (a *AlarmManager) NewAlertmanagerClient() *client.AlertmanagerAPI {
cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes)
return client.New(cr, strfmt.Default)
}
func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) {
+ if len(amLabels) == 0 || len(amAnnotations) == 0 {
+ return &alert.PostAlertsOK{}, nil
+ }
+
pa := &models.PostableAlert{
Alert: models.Alert{
- GeneratorURL: strfmt.URI(""),
+ GeneratorURL: strfmt.URI("http://service-ricplt-alarmmanager-http.ricplt:8080/ric/v1/alarms"),
Labels: amLabels,
},
Annotations: amAnnotations,
app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations)
ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams)
if err != nil {
- app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err)
+ app.Logger.Error("Posting alerts to '%s/%s' failed: %v", a.amHost, a.amBaseUrl, err)
}
return ok, err
}
+func (a *AlarmManager) GetAlerts() (*alert.GetAlertsOK, error) {
+ active := true
+ alertParams := alert.NewGetAlertsParams()
+ alertParams.Active = &active
+ resp, err := a.NewAlertmanagerClient().Alert.GetAlerts(alertParams)
+ if err != nil {
+ app.Logger.Error("Getting alerts from '%s/%s' failed: %v", a.amHost, a.amBaseUrl, err)
+ return resp, nil
+ }
+ app.Logger.Info("GetAlerts: %+v", resp)
+
+ return resp, err
+}
+
+func (a *AlarmManager) ProcessAlerts() {
+ resp, err := a.GetAlerts()
+ if err != nil || resp == nil {
+ app.Logger.Error("Getting alerts from '%s/%s' failed: %v", a.amHost, a.amBaseUrl, err)
+ return
+ }
+
+ var buildAlarm = func(alert *models.GettableAlert) alarm.Alarm {
+ a := alarm.Alarm{ManagedObjectId: "SEP", ApplicationId: "FM"}
+
+ if v, ok := alert.Alert.Labels["specific_problem"]; ok {
+ sp, _ := strconv.Atoi(v)
+ a.SpecificProblem = sp
+ }
+
+ if v, ok := alert.Alert.Labels["severity"]; ok {
+ a.PerceivedSeverity = alarm.Severity(fmt.Sprint(v))
+ }
+
+ if v, ok := alert.Alert.Labels["name"]; ok {
+ a.AdditionalInfo = v
+ }
+
+ if v, ok := alert.Annotations["description"]; ok {
+ a.IdentifyingInfo = v
+ }
+
+ return a
+ }
+
+ // Remove cleared alerts first
+ for _, m := range a.activeAlarms {
+ if m.ApplicationId != "FM" {
+ continue
+ }
+
+ found := false
+ for _, alert := range resp.Payload {
+ v, ok := alert.Alert.Labels["service"]
+ if !ok || !strings.Contains(v, "FM") {
+ continue
+ }
+
+ a := buildAlarm(alert)
+ if m.ManagedObjectId == a.ManagedObjectId && m.ApplicationId == a.ApplicationId &&
+ m.SpecificProblem == a.SpecificProblem && m.IdentifyingInfo == a.IdentifyingInfo {
+ found = true
+ break
+ }
+ }
+
+ if !found {
+ m.AlarmAction = alarm.AlarmActionClear
+ go a.ProcessAlarm(&m)
+ }
+ }
+
+ for _, alert := range resp.Payload {
+ v, ok := alert.Alert.Labels["service"]
+ if ok && strings.Contains(v, "FM") {
+ m := alarm.AlarmMessage{Alarm: buildAlarm(alert), AlarmAction: alarm.AlarmActionRaise, AlarmTime: time.Now().UnixNano()}
+ go a.ProcessAlarm(&AlarmNotification{m, alarm.AlarmDefinition{}})
+ }
+ }
+}
+
func (a *AlarmManager) StatusCB() bool {
if !a.rmrReady {
app.Logger.Info("RMR not ready yet!")
}
-
return a.rmrReady
}
func (a *AlarmManager) ConfigChangeCB(configparam string) {
-
a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
+ if a.maxActiveAlarms == 0 {
+ a.maxActiveAlarms = 5000
+ }
+
a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
+ if a.maxAlarmHistory == 0 {
+ a.maxAlarmHistory = 20000
+ }
+
a.alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
a.amHost = viper.GetString("controls.promAlertManager.address")
ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
ricAlarmDefintion.RaiseDelay = alarmDefinition.RaiseDelay
ricAlarmDefintion.ClearDelay = alarmDefinition.ClearDelay
+ ricAlarmDefintion.TimeToLive = alarmDefinition.TimeToLive
alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
}
}
var alarmpersistentinfo AlarmPersistentInfo
byteValue, rerr := ioutil.ReadFile(a.alarmInfoPvFile)
if rerr != nil {
- app.Logger.Error("ararminfo.json file read error %v", rerr)
+ app.Logger.Info("Unable to read alarminfo.json : %v", rerr)
} else {
err := json.Unmarshal(byteValue, &alarmpersistentinfo)
if err != nil {
alarmpersistentinfo.UniqueAlarmId = a.uniqueAlarmId
alarmpersistentinfo.ActiveAlarms = make([]AlarmNotification, len(a.activeAlarms))
alarmpersistentinfo.AlarmHistory = make([]AlarmNotification, len(a.alarmHistory))
+
copy(alarmpersistentinfo.ActiveAlarms, a.activeAlarms)
copy(alarmpersistentinfo.AlarmHistory, a.alarmHistory)
+
wdata, err := json.MarshalIndent(alarmpersistentinfo, "", " ")
if err != nil {
app.Logger.Error("alarmpersistentinfo json marshal error %v", err)
}
}
-func (a *AlarmManager) Run(sdlcheck bool) {
+func (a *AlarmManager) Run(sdlcheck bool, ttlInterval int) {
app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
app.Resource.InjectStatusCb(a.StatusCB)
alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
a.ReadAlarmDefinitionFromJson()
- app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
- app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
- app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
- app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
- app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
- app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
- app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
- app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
- app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
- app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET")
+ a.InjectRoutes()
// Start background timer for re-raising alerts
go a.StartAlertTimer()
+ go a.StartTTLTimer(ttlInterval)
+
a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
a.ReadAlarmInfoFromPersistentVolume()
- app.RunWithParams(a, sdlcheck)
+ time.Sleep(8 * time.Second)
+ app.RunWithRunParams(a, app.RunParams{SdlCheck: sdlcheck, DisableAlarmClient: true})
}
func NewAlarmManager(amHost string, alertInterval int, clearAlarm bool) *AlarmManager {
amHost = viper.GetString("controls.promAlertManager.address")
}
+ maxActiveAlarms := app.Config.GetInt("controls.maxActiveAlarms")
+ if maxActiveAlarms == 0 {
+ maxActiveAlarms = 5000
+ }
+
+ maxAlarmHistory := app.Config.GetInt("controls.maxAlarmHistory")
+ if maxAlarmHistory == 0 {
+ maxAlarmHistory = 20000
+ }
+
return &AlarmManager{
rmrReady: false,
postClear: clearAlarm,
activeAlarms: make([]AlarmNotification, 0),
alarmHistory: make([]AlarmNotification, 0),
uniqueAlarmId: 0,
- maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
- maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
+ maxActiveAlarms: maxActiveAlarms,
+ maxAlarmHistory: maxAlarmHistory,
exceededActiveAlarmOn: false,
exceededAlarmHistoryOn: false,
alarmInfoPvFile: app.Config.GetString("controls.alarmInfoPvFile"),
// Main function
func main() {
- NewAlarmManager("", 0, true).Run(true)
+ NewAlarmManager("", 0, true).Run(true, 10)
}