2 * Copyright (c) 2020 AT&T Intellectual Property.
3 * Copyright (c) 2020 Nokia.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 * This source code is part of the near-RT RIC (RAN Intelligent Controller)
18 * platform project (RICP).
27 "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
28 app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
29 clientruntime "github.com/go-openapi/runtime/client"
30 "github.com/go-openapi/strfmt"
31 "github.com/prometheus/alertmanager/api/v2/client"
32 "github.com/prometheus/alertmanager/api/v2/client/alert"
33 "github.com/prometheus/alertmanager/api/v2/models"
34 "github.com/spf13/viper"
41 func (a *AlarmManager) StartAlertTimer() {
42 tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
45 for _, m := range a.activeAlarms {
46 app.Logger.Info("Re-raising alarm: %v", m)
47 a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
53 func (a *AlarmManager) Consume(rp *app.RMRParams) (err error) {
54 app.Logger.Info("Message received!")
56 defer app.Rmr.Free(rp.Mbuf)
58 case alarm.RIC_ALARM_UPDATE:
61 app.Logger.Info("Unknown Message Type '%d', discarding", rp.Mtype)
67 func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, error) {
68 var m alarm.AlarmMessage
69 app.Logger.Info("Received JSON: %s", rp.Payload)
70 if err := json.Unmarshal(rp.Payload, &m); err != nil {
71 app.Logger.Error("json.Unmarshal failed: %v", err)
74 app.Logger.Info("newAlarm: %v", m)
76 return a.ProcessAlarm(&AlarmNotification{m, alarm.AlarmDefinition{}})
79 func (a *AlarmManager) ProcessAlarm(m *AlarmNotification) (*alert.PostAlertsOK, error) {
81 if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
82 app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
87 // Suppress duplicate alarms
88 idx, found := a.IsMatchFound(m.Alarm)
89 if found && m.AlarmAction == alarm.AlarmActionRaise {
90 app.Logger.Info("Duplicate alarm found, suppressing ...")
91 if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
92 // Duplicate with same severity found
96 // Remove duplicate with different severity
97 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
101 // Clear alarm if found from active alarm list
102 if m.AlarmAction == alarm.AlarmActionClear {
104 a.UpdateAlarmFields(a.activeAlarms[idx].AlarmId, m)
105 a.alarmHistory = append(a.alarmHistory, *m)
106 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
107 if (len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false) {
108 app.Logger.Warn("alarm history count exceeded maxAlarmHistory threshold")
109 a.GenerateThresholdAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, "history")
112 if a.exceededActiveAlarmOn && m.Alarm.SpecificProblem == alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD {
113 a.exceededActiveAlarmOn = false
116 if a.exceededAlarmHistoryOn && m.Alarm.SpecificProblem == alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD {
117 a.exceededAlarmHistoryOn = false
123 // Send alarm notification to NOMA, if enabled
124 if app.Config.GetBool("controls.noma.enabled") {
125 m.PerceivedSeverity = alarm.SeverityCleared
126 return a.PostAlarm(m)
128 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
131 app.Logger.Info("No matching active alarm found, suppressing ...")
136 // New alarm -> update active alarms and post to Alert Manager
137 if m.AlarmAction == alarm.AlarmActionRaise {
138 a.UpdateAlarmFields(a.GenerateAlarmId(), m)
139 a.UpdateAlarmLists(m)
142 // Send alarm notification to NOMA, if enabled
143 if app.Config.GetBool("controls.noma.enabled") {
144 return a.PostAlarm(m)
146 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
153 func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
154 for i, m := range a.activeAlarms {
155 if m.ManagedObjectId == newAlarm.ManagedObjectId && m.ApplicationId == newAlarm.ApplicationId &&
156 m.SpecificProblem == newAlarm.SpecificProblem && m.IdentifyingInfo == newAlarm.IdentifyingInfo {
163 func (a *AlarmManager) RemoveAlarm(alarms []AlarmNotification, i int, listName string) []AlarmNotification {
164 app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
165 copy(alarms[i:], alarms[i+1:])
166 return alarms[:len(alarms)-1]
169 func (a *AlarmManager) GenerateAlarmId() int {
170 a.uniqueAlarmId++ // @todo: generate a unique ID
171 return a.uniqueAlarmId
174 func (a *AlarmManager) UpdateAlarmFields(alarmId int, newAlarm *AlarmNotification) {
175 alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
176 newAlarm.AlarmId = alarmId
177 newAlarm.AlarmText = alarmDef.AlarmText
178 newAlarm.EventType = alarmDef.EventType
181 func (a *AlarmManager) GenerateThresholdAlarm(sp int, data string) bool {
182 thresholdAlarm := a.alarmClient.NewAlarm(sp, alarm.SeverityWarning, "threshold", data)
183 thresholdMessage := alarm.AlarmMessage{
184 Alarm: thresholdAlarm,
185 AlarmAction: alarm.AlarmActionRaise,
186 AlarmTime: (time.Now().UnixNano()),
188 a.activeAlarms = append(a.activeAlarms, AlarmNotification{thresholdMessage, alarm.AlarmDefinition{}})
189 a.alarmHistory = append(a.alarmHistory, AlarmNotification{thresholdMessage, alarm.AlarmDefinition{}})
194 func (a *AlarmManager) UpdateAlarmLists(newAlarm *AlarmNotification) {
195 /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
196 The attempt to raise the alarm next time will be supressed when found as duplicate. */
197 if (len(a.activeAlarms) >= a.maxActiveAlarms) && (a.exceededActiveAlarmOn == false) {
198 app.Logger.Warn("active alarm count exceeded maxActiveAlarms threshold")
199 a.exceededActiveAlarmOn = a.GenerateThresholdAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, "active")
202 if (len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false) {
203 app.Logger.Warn("alarm history count exceeded maxAlarmHistory threshold")
204 a.exceededAlarmHistoryOn = a.GenerateThresholdAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, "history")
207 // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
208 a.activeAlarms = append(a.activeAlarms, *newAlarm)
209 a.alarmHistory = append(a.alarmHistory, *newAlarm)
212 func (a *AlarmManager) PostAlarm(m *AlarmNotification) (*alert.PostAlertsOK, error) {
213 result, err := json.Marshal(m)
215 app.Logger.Info("json.Marshal failed: %v", err)
219 fullUrl := fmt.Sprintf("%s/%s", app.Config.GetString("controls.noma.host"), app.Config.GetString("controls.noma.alarmUrl"))
220 app.Logger.Info("Posting alarm to '%s'", fullUrl)
222 resp, err := http.Post(fullUrl, "application/json", bytes.NewReader(result))
223 if err != nil || resp == nil {
224 app.Logger.Info("Unable to post alarm to '%s': %v", fullUrl, err)
230 func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
231 alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
232 amLabels := models.LabelSet{
233 "status": string(status),
234 "alertname": alarmDef.AlarmText,
235 "severity": string(newAlarm.PerceivedSeverity),
236 "service": fmt.Sprintf("%s/%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
237 "system_name": "RIC",
239 amAnnotations := models.LabelSet{
240 "alarm_id": fmt.Sprintf("%d", alarmDef.AlarmId),
241 "specific_problem": fmt.Sprintf("%d", newAlarm.SpecificProblem),
242 "event_type": alarmDef.EventType,
243 "identifying_info": newAlarm.IdentifyingInfo,
244 "additional_info": newAlarm.AdditionalInfo,
245 "description": fmt.Sprintf("%s:%s", newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo),
246 "instructions": alarmDef.OperationInstructions,
247 "timestamp": fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
250 return amLabels, amAnnotations
253 func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager {
254 cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes)
255 return client.New(cr, strfmt.Default)
258 func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) {
259 pa := &models.PostableAlert{
261 GeneratorURL: strfmt.URI(""),
264 Annotations: amAnnotations,
266 alertParams := alert.NewPostAlertsParams().WithAlerts(models.PostableAlerts{pa})
268 app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations)
269 ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams)
271 app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err)
276 func (a *AlarmManager) StatusCB() bool {
278 app.Logger.Info("RMR not ready yet!")
284 func (a *AlarmManager) ConfigChangeCB(configparam string) {
286 a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
287 a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
288 a.alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
289 a.amHost = viper.GetString("controls.promAlertManager.address")
291 app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
292 app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
293 app.Logger.Debug("ConfigChangeCB: alertInterval %v", a.alertInterval)
294 app.Logger.Debug("ConfigChangeCB: amHost = %v", a.amHost)
299 func (a *AlarmManager) ReadAlarmDefinitionFromJson() {
301 filename := os.Getenv("DEF_FILE")
302 file, err := ioutil.ReadFile(filename)
304 data := RicAlarmDefinitions{}
305 err = json.Unmarshal([]byte(file), &data)
307 for _, alarmDefinition := range data.AlarmDefinitions {
308 _, exists := alarm.RICAlarmDefinitions[alarmDefinition.AlarmId]
310 app.Logger.Error("ReadAlarmDefinitionFromJson: alarm definition already exists for %v", alarmDefinition.AlarmId)
312 app.Logger.Debug("ReadAlarmDefinitionFromJson: alarm %v", alarmDefinition.AlarmId)
313 ricAlarmDefintion := new(alarm.AlarmDefinition)
314 ricAlarmDefintion.AlarmId = alarmDefinition.AlarmId
315 ricAlarmDefintion.AlarmText = alarmDefinition.AlarmText
316 ricAlarmDefintion.EventType = alarmDefinition.EventType
317 ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
318 alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
322 app.Logger.Error("ReadAlarmDefinitionFromJson: json.Unmarshal failed with error %v", err)
325 app.Logger.Error("ReadAlarmDefinitionFromJson: ioutil.ReadFile failed with error %v", err)
329 func (a *AlarmManager) Run(sdlcheck bool) {
330 app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
331 app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
332 app.Resource.InjectStatusCb(a.StatusCB)
333 app.AddConfigChangeListener(a.ConfigChangeCB)
335 alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
336 a.ReadAlarmDefinitionFromJson()
338 app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
339 app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
340 app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
341 app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
342 app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
343 app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
344 app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
345 app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
346 app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
347 app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET")
349 // Start background timer for re-raising alerts
350 go a.StartAlertTimer()
351 a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
353 app.RunWithParams(a, sdlcheck)
356 func NewAlarmManager(amHost string, alertInterval int, clearAlarm bool) *AlarmManager {
357 if alertInterval == 0 {
358 alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
362 amHost = viper.GetString("controls.promAlertManager.address")
365 return &AlarmManager{
367 postClear: clearAlarm,
369 amBaseUrl: app.Config.GetString("controls.promAlertManager.baseUrl"),
370 amSchemes: []string{app.Config.GetString("controls.promAlertManager.schemes")},
371 alertInterval: alertInterval,
372 activeAlarms: make([]AlarmNotification, 0),
373 alarmHistory: make([]AlarmNotification, 0),
375 maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
376 maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
377 exceededActiveAlarmOn: false,
378 exceededAlarmHistoryOn: false,
384 NewAlarmManager("", 0, true).Run(true)