2 * Copyright (c) 2020 AT&T Intellectual Property.
3 * Copyright (c) 2020 Nokia.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 * This source code is part of the near-RT RIC (RAN Intelligent Controller)
18 * platform project (RICP).
26 "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
27 app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
28 clientruntime "github.com/go-openapi/runtime/client"
29 "github.com/go-openapi/strfmt"
30 "github.com/prometheus/alertmanager/api/v2/client"
31 "github.com/prometheus/alertmanager/api/v2/client/alert"
32 "github.com/prometheus/alertmanager/api/v2/models"
33 "github.com/spf13/viper"
39 func (a *AlarmManager) StartAlertTimer() {
40 tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
43 for _, m := range a.activeAlarms {
44 app.Logger.Info("Re-raising alarm: %v", m)
45 a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
51 func (a *AlarmManager) Consume(rp *app.RMRParams) (err error) {
52 app.Logger.Info("Message received!")
54 defer app.Rmr.Free(rp.Mbuf)
56 case alarm.RIC_ALARM_UPDATE:
59 app.Logger.Info("Unknown Message Type '%d', discarding", rp.Mtype)
65 func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, error) {
66 var m alarm.AlarmMessage
67 app.Logger.Info("Received JSON: %s", rp.Payload)
68 if err := json.Unmarshal(rp.Payload, &m); err != nil {
69 app.Logger.Error("json.Unmarshal failed: %v", err)
72 app.Logger.Info("newAlarm: %v", m)
74 return a.ProcessAlarm(&m)
77 func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, error) {
79 if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
80 app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
85 // Suppress duplicate alarms
86 idx, found := a.IsMatchFound(m.Alarm)
87 if found && m.AlarmAction == alarm.AlarmActionRaise {
88 app.Logger.Info("Duplicate alarm found, suppressing ...")
89 if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
90 // Duplicate with same severity found
94 // Remove duplicate with different severity
95 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
99 // Clear alarm if found from active alarm list
100 if m.AlarmAction == alarm.AlarmActionClear {
102 a.alarmHistory = append(a.alarmHistory, *m)
103 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
104 if ((len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false)){
105 app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
106 histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history")
107 histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
108 a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
109 a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
111 if ((a.exceededActiveAlarmOn == true) && (m.Alarm.SpecificProblem == alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD)) {
112 a.exceededActiveAlarmOn = false
114 if ((a.exceededAlarmHistoryOn == true) && (m.Alarm.SpecificProblem == alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD)) {
115 a.exceededAlarmHistoryOn = false
119 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
122 app.Logger.Info("No matching active alarm found, suppressing ...")
127 // New alarm -> update active alarms and post to Alert Manager
128 if m.AlarmAction == alarm.AlarmActionRaise {
129 a.UpdateAlarmLists(m)
131 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
138 func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
139 for i, m := range a.activeAlarms {
140 if m.ManagedObjectId == newAlarm.ManagedObjectId && m.ApplicationId == newAlarm.ApplicationId &&
141 m.SpecificProblem == newAlarm.SpecificProblem && m.IdentifyingInfo == newAlarm.IdentifyingInfo {
148 func (a *AlarmManager) RemoveAlarm(alarms []alarm.AlarmMessage, i int, listName string) []alarm.AlarmMessage {
149 app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
150 copy(alarms[i:], alarms[i+1:])
151 return alarms[:len(alarms)-1]
154 func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
155 /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
156 The attempt to raise the alarm next time will be supressed when found as duplicate. */
157 if ((len(a.activeAlarms) >= a.maxActiveAlarms) && (a.exceededActiveAlarmOn == false)) {
158 app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold")
159 actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "active")
160 actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
161 a.activeAlarms = append(a.activeAlarms, actAlarmMessage)
162 a.alarmHistory = append(a.alarmHistory, actAlarmMessage)
163 a.exceededActiveAlarmOn = true
166 if ((len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false)) {
167 app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
168 histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history")
169 histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
170 a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
171 a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
172 a.exceededAlarmHistoryOn = true
175 // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
176 a.activeAlarms = append(a.activeAlarms, *newAlarm)
177 a.alarmHistory = append(a.alarmHistory, *newAlarm)
180 func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
181 alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
182 amLabels := models.LabelSet{
183 "status": string(status),
184 "alertname": alarmDef.AlarmText,
185 "severity": string(newAlarm.PerceivedSeverity),
186 "service": fmt.Sprintf("%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
187 "system_name": fmt.Sprintf("RIC:%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
189 amAnnotations := models.LabelSet{
190 "alarm_id": fmt.Sprintf("%d", alarmDef.AlarmId),
191 "description": fmt.Sprintf("%d:%s:%s", newAlarm.SpecificProblem, newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo),
192 "additional_info": newAlarm.AdditionalInfo,
193 "summary": alarmDef.EventType,
194 "instructions": alarmDef.OperationInstructions,
195 "timestamp": fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
198 return amLabels, amAnnotations
201 func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager {
202 cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes)
203 return client.New(cr, strfmt.Default)
206 func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) {
207 pa := &models.PostableAlert{
209 GeneratorURL: strfmt.URI(""),
212 Annotations: amAnnotations,
214 alertParams := alert.NewPostAlertsParams().WithAlerts(models.PostableAlerts{pa})
216 app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations)
217 ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams)
219 app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err)
224 func (a *AlarmManager) StatusCB() bool {
226 app.Logger.Info("RMR not ready yet!")
232 func (a *AlarmManager) ConfigChangeCB(configparam string) {
234 a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
235 a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
236 a.alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
237 a.amHost = viper.GetString("controls.promAlertManager.address")
239 app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
240 app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
241 app.Logger.Debug("ConfigChangeCB: alertInterval %v", a.alertInterval)
242 app.Logger.Debug("ConfigChangeCB: amHost = %v", a.amHost)
247 func (a *AlarmManager) ReadAlarmDefinitionFromJson() {
249 filename := os.Getenv("DEF_FILE")
250 file, err := ioutil.ReadFile(filename)
252 data := RicAlarmDefinitions{}
253 err = json.Unmarshal([]byte(file), &data)
255 for _, alarmDefinition := range data.AlarmDefinitions {
256 _, exists := alarm.RICAlarmDefinitions[alarmDefinition.AlarmId]
258 app.Logger.Error("ReadAlarmDefinitionFromJson: alarm definition already exists for %v", alarmDefinition.AlarmId)
260 app.Logger.Debug("ReadAlarmDefinitionFromJson: alarm %v", alarmDefinition.AlarmId)
261 ricAlarmDefintion := new(alarm.AlarmDefinition)
262 ricAlarmDefintion.AlarmId = alarmDefinition.AlarmId
263 ricAlarmDefintion.AlarmText = alarmDefinition.AlarmText
264 ricAlarmDefintion.EventType = alarmDefinition.EventType
265 ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
266 alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
270 app.Logger.Error("ReadAlarmDefinitionFromJson: json.Unmarshal failed with error %v", err)
273 app.Logger.Error("ReadAlarmDefinitionFromJson: ioutil.ReadFile failed with error %v", err)
277 func (a *AlarmManager) Run(sdlcheck bool) {
278 app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
279 app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
280 app.Resource.InjectStatusCb(a.StatusCB)
281 app.AddConfigChangeListener(a.ConfigChangeCB)
283 alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
284 a.ReadAlarmDefinitionFromJson()
286 app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
287 app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
288 app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
289 app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
290 app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
291 app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
292 app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
293 app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
294 app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
295 app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET")
297 // Start background timer for re-raising alerts
298 a.postClear = sdlcheck
299 go a.StartAlertTimer()
300 a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
302 app.RunWithParams(a, sdlcheck)
305 func NewAlarmManager(amHost string, alertInterval int) *AlarmManager {
306 if alertInterval == 0 {
307 alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
311 amHost = viper.GetString("controls.promAlertManager.address")
314 return &AlarmManager{
317 amBaseUrl: viper.GetString("controls.promAlertManager.baseUrl"),
318 amSchemes: []string{viper.GetString("controls.promAlertManager.schemes")},
319 alertInterval: alertInterval,
320 activeAlarms: make([]alarm.AlarmMessage, 0),
321 alarmHistory: make([]alarm.AlarmMessage, 0),
322 maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
323 maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
324 exceededActiveAlarmOn: false,
325 exceededAlarmHistoryOn: false,
331 NewAlarmManager("", 0).Run(true)