2 * Copyright (c) 2020 AT&T Intellectual Property.
3 * Copyright (c) 2020 Nokia.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 * This source code is part of the near-RT RIC (RAN Intelligent Controller)
18 * platform project (RICP).
27 "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
28 app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
29 clientruntime "github.com/go-openapi/runtime/client"
30 "github.com/go-openapi/strfmt"
31 "github.com/prometheus/alertmanager/api/v2/client"
32 "github.com/prometheus/alertmanager/api/v2/client/alert"
33 "github.com/prometheus/alertmanager/api/v2/models"
34 "github.com/spf13/viper"
41 func (a *AlarmManager) StartAlertTimer() {
42 tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
45 for _, m := range a.activeAlarms {
46 app.Logger.Info("Re-raising alarm: %v", m)
47 a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
53 func (a *AlarmManager) Consume(rp *app.RMRParams) (err error) {
54 app.Logger.Info("Message received!")
56 defer app.Rmr.Free(rp.Mbuf)
58 case alarm.RIC_ALARM_UPDATE:
61 app.Logger.Info("Unknown Message Type '%d', discarding", rp.Mtype)
67 func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, error) {
68 var m alarm.AlarmMessage
69 app.Logger.Info("Received JSON: %s", rp.Payload)
70 if err := json.Unmarshal(rp.Payload, &m); err != nil {
71 app.Logger.Error("json.Unmarshal failed: %v", err)
74 app.Logger.Info("newAlarm: %v", m)
76 return a.ProcessAlarm(&AlarmNotification{m, alarm.AlarmDefinition{}})
79 func (a *AlarmManager) ProcessAlarm(m *AlarmNotification) (*alert.PostAlertsOK, error) {
82 if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
83 app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
88 // Suppress duplicate alarms
89 idx, found := a.IsMatchFound(m.Alarm)
90 if found && m.AlarmAction == alarm.AlarmActionRaise {
91 app.Logger.Info("Duplicate alarm found, suppressing ...")
92 if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
93 // Duplicate with same severity found
97 // Remove duplicate with different severity
98 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
102 // Clear alarm if found from active alarm list
103 if m.AlarmAction == alarm.AlarmActionClear {
105 a.UpdateAlarmFields(a.activeAlarms[idx].AlarmId, m)
106 a.alarmHistory = append(a.alarmHistory, *m)
107 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
108 if (len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false) {
109 app.Logger.Warn("alarm history count exceeded maxAlarmHistory threshold")
110 a.GenerateThresholdAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, "history")
113 if a.exceededActiveAlarmOn && m.Alarm.SpecificProblem == alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD {
114 a.exceededActiveAlarmOn = false
117 if a.exceededAlarmHistoryOn && m.Alarm.SpecificProblem == alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD {
118 a.exceededAlarmHistoryOn = false
121 a.WriteAlarmInfoToPersistentVolume()
126 // Send alarm notification to NOMA, if enabled
127 if app.Config.GetBool("controls.noma.enabled") {
128 m.PerceivedSeverity = alarm.SeverityCleared
129 return a.PostAlarm(m)
131 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
134 app.Logger.Info("No matching active alarm found, suppressing ...")
139 // New alarm -> update active alarms and post to Alert Manager
140 if m.AlarmAction == alarm.AlarmActionRaise {
141 a.UpdateAlarmFields(a.GenerateAlarmId(), m)
142 a.UpdateAlarmLists(m)
143 a.WriteAlarmInfoToPersistentVolume()
146 // Send alarm notification to NOMA, if enabled
147 if app.Config.GetBool("controls.noma.enabled") {
148 return a.PostAlarm(m)
150 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
157 func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
158 for i, m := range a.activeAlarms {
159 if m.ManagedObjectId == newAlarm.ManagedObjectId && m.ApplicationId == newAlarm.ApplicationId &&
160 m.SpecificProblem == newAlarm.SpecificProblem && m.IdentifyingInfo == newAlarm.IdentifyingInfo {
167 func (a *AlarmManager) RemoveAlarm(alarms []AlarmNotification, i int, listName string) []AlarmNotification {
168 app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
169 copy(alarms[i:], alarms[i+1:])
170 return alarms[:len(alarms)-1]
173 func (a *AlarmManager) GenerateAlarmId() int {
174 a.uniqueAlarmId++ // @todo: generate a unique ID
175 return a.uniqueAlarmId
178 func (a *AlarmManager) UpdateAlarmFields(alarmId int, newAlarm *AlarmNotification) {
179 alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
180 newAlarm.AlarmId = alarmId
181 newAlarm.AlarmText = alarmDef.AlarmText
182 newAlarm.EventType = alarmDef.EventType
185 func (a *AlarmManager) GenerateThresholdAlarm(sp int, data string) bool {
186 thresholdAlarm := a.alarmClient.NewAlarm(sp, alarm.SeverityWarning, "threshold", data)
187 thresholdMessage := alarm.AlarmMessage{
188 Alarm: thresholdAlarm,
189 AlarmAction: alarm.AlarmActionRaise,
190 AlarmTime: (time.Now().UnixNano()),
192 alarmDef := alarm.RICAlarmDefinitions[sp]
193 alarmId := a.GenerateAlarmId()
194 alarmDef.AlarmId = alarmId
195 a.activeAlarms = append(a.activeAlarms, AlarmNotification{thresholdMessage, *alarmDef})
196 a.alarmHistory = append(a.alarmHistory, AlarmNotification{thresholdMessage, *alarmDef})
201 func (a *AlarmManager) UpdateAlarmLists(newAlarm *AlarmNotification) {
202 /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
203 The attempt to raise the alarm next time will be supressed when found as duplicate. */
204 if (len(a.activeAlarms) >= a.maxActiveAlarms) && (a.exceededActiveAlarmOn == false) {
205 app.Logger.Warn("active alarm count exceeded maxActiveAlarms threshold")
206 a.exceededActiveAlarmOn = a.GenerateThresholdAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, "active")
209 if (len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false) {
210 app.Logger.Warn("alarm history count exceeded maxAlarmHistory threshold")
211 a.exceededAlarmHistoryOn = a.GenerateThresholdAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, "history")
214 // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
215 a.activeAlarms = append(a.activeAlarms, *newAlarm)
216 a.alarmHistory = append(a.alarmHistory, *newAlarm)
219 func (a *AlarmManager) PostAlarm(m *AlarmNotification) (*alert.PostAlertsOK, error) {
220 result, err := json.Marshal(m)
222 app.Logger.Info("json.Marshal failed: %v", err)
226 fullUrl := fmt.Sprintf("%s/%s", app.Config.GetString("controls.noma.host"), app.Config.GetString("controls.noma.alarmUrl"))
227 app.Logger.Info("Posting alarm to '%s'", fullUrl)
229 resp, err := http.Post(fullUrl, "application/json", bytes.NewReader(result))
230 if err != nil || resp == nil {
231 app.Logger.Info("Unable to post alarm to '%s': %v", fullUrl, err)
237 func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
238 alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
239 amLabels := models.LabelSet{
240 "status": string(status),
241 "alertname": alarmDef.AlarmText,
242 "severity": string(newAlarm.PerceivedSeverity),
243 "service": fmt.Sprintf("%s/%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
244 "system_name": "RIC",
246 amAnnotations := models.LabelSet{
247 "alarm_id": fmt.Sprintf("%d", alarmDef.AlarmId),
248 "specific_problem": fmt.Sprintf("%d", newAlarm.SpecificProblem),
249 "event_type": alarmDef.EventType,
250 "identifying_info": newAlarm.IdentifyingInfo,
251 "additional_info": newAlarm.AdditionalInfo,
252 "description": fmt.Sprintf("%s:%s", newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo),
253 "instructions": alarmDef.OperationInstructions,
254 "timestamp": fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
257 return amLabels, amAnnotations
260 func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager {
261 cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes)
262 return client.New(cr, strfmt.Default)
265 func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) {
266 pa := &models.PostableAlert{
268 GeneratorURL: strfmt.URI(""),
271 Annotations: amAnnotations,
273 alertParams := alert.NewPostAlertsParams().WithAlerts(models.PostableAlerts{pa})
275 app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations)
276 ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams)
278 app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err)
283 func (a *AlarmManager) StatusCB() bool {
285 app.Logger.Info("RMR not ready yet!")
291 func (a *AlarmManager) ConfigChangeCB(configparam string) {
293 a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
294 a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
295 a.alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
296 a.amHost = viper.GetString("controls.promAlertManager.address")
298 app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
299 app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
300 app.Logger.Debug("ConfigChangeCB: alertInterval %v", a.alertInterval)
301 app.Logger.Debug("ConfigChangeCB: amHost = %v", a.amHost)
306 func (a *AlarmManager) ReadAlarmDefinitionFromJson() {
308 filename := os.Getenv("DEF_FILE")
309 file, err := ioutil.ReadFile(filename)
311 data := RicAlarmDefinitions{}
312 err = json.Unmarshal([]byte(file), &data)
314 for _, alarmDefinition := range data.AlarmDefinitions {
315 _, exists := alarm.RICAlarmDefinitions[alarmDefinition.AlarmId]
317 app.Logger.Error("ReadAlarmDefinitionFromJson: alarm definition already exists for %v", alarmDefinition.AlarmId)
319 app.Logger.Debug("ReadAlarmDefinitionFromJson: alarm %v", alarmDefinition.AlarmId)
320 ricAlarmDefintion := new(alarm.AlarmDefinition)
321 ricAlarmDefintion.AlarmId = alarmDefinition.AlarmId
322 ricAlarmDefintion.AlarmText = alarmDefinition.AlarmText
323 ricAlarmDefintion.EventType = alarmDefinition.EventType
324 ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
325 alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
329 app.Logger.Error("ReadAlarmDefinitionFromJson: json.Unmarshal failed with error %v", err)
332 app.Logger.Error("ReadAlarmDefinitionFromJson: ioutil.ReadFile failed with error %v", err)
336 func (a *AlarmManager) ReadAlarmInfoFromPersistentVolume() {
337 var alarmpersistentinfo AlarmPersistentInfo
338 byteValue, rerr := ioutil.ReadFile(a.alarmInfoPvFile)
340 app.Logger.Error("ararminfo.json file read error %v", rerr)
342 err := json.Unmarshal(byteValue, &alarmpersistentinfo)
344 app.Logger.Error("alarmpersistentinfo json unmarshal error %v", err)
346 a.uniqueAlarmId = alarmpersistentinfo.UniqueAlarmId
347 a.activeAlarms = make([]AlarmNotification, len(alarmpersistentinfo.ActiveAlarms))
348 a.alarmHistory = make([]AlarmNotification, len(alarmpersistentinfo.AlarmHistory))
349 copy(a.activeAlarms, alarmpersistentinfo.ActiveAlarms)
350 copy(a.alarmHistory, alarmpersistentinfo.AlarmHistory)
355 func (a *AlarmManager) WriteAlarmInfoToPersistentVolume() {
356 var alarmpersistentinfo AlarmPersistentInfo
357 alarmpersistentinfo.UniqueAlarmId = a.uniqueAlarmId
358 alarmpersistentinfo.ActiveAlarms = make([]AlarmNotification, len(a.activeAlarms))
359 alarmpersistentinfo.AlarmHistory = make([]AlarmNotification, len(a.alarmHistory))
360 copy(alarmpersistentinfo.ActiveAlarms, a.activeAlarms)
361 copy(alarmpersistentinfo.AlarmHistory, a.alarmHistory)
362 wdata, err := json.MarshalIndent(alarmpersistentinfo, "", " ")
364 app.Logger.Error("alarmpersistentinfo json marshal error %v", err)
366 werr := ioutil.WriteFile(a.alarmInfoPvFile, wdata, 0777)
368 app.Logger.Error("alarminfo.json file write error %v", werr)
373 func (a *AlarmManager) Run(sdlcheck bool) {
374 app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
375 app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
376 app.Resource.InjectStatusCb(a.StatusCB)
377 app.AddConfigChangeListener(a.ConfigChangeCB)
379 alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
380 a.ReadAlarmDefinitionFromJson()
382 app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
383 app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
384 app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
385 app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
386 app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
387 app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
388 app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
389 app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
390 app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
391 app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET")
393 // Start background timer for re-raising alerts
394 go a.StartAlertTimer()
395 a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
397 a.ReadAlarmInfoFromPersistentVolume()
399 app.RunWithParams(a, sdlcheck)
402 func NewAlarmManager(amHost string, alertInterval int, clearAlarm bool) *AlarmManager {
403 if alertInterval == 0 {
404 alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
408 amHost = viper.GetString("controls.promAlertManager.address")
411 return &AlarmManager{
413 postClear: clearAlarm,
415 amBaseUrl: app.Config.GetString("controls.promAlertManager.baseUrl"),
416 amSchemes: []string{app.Config.GetString("controls.promAlertManager.schemes")},
417 alertInterval: alertInterval,
418 activeAlarms: make([]AlarmNotification, 0),
419 alarmHistory: make([]AlarmNotification, 0),
421 maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
422 maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
423 exceededActiveAlarmOn: false,
424 exceededAlarmHistoryOn: false,
425 alarmInfoPvFile: app.Config.GetString("controls.alarmInfoPvFile"),
431 NewAlarmManager("", 0, true).Run(true)