2 * Copyright (c) 2020 AT&T Intellectual Property.
3 * Copyright (c) 2020 Nokia.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 * This source code is part of the near-RT RIC (RAN Intelligent Controller)
18 * platform project (RICP).
26 "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
27 app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
28 clientruntime "github.com/go-openapi/runtime/client"
29 "github.com/go-openapi/strfmt"
30 "github.com/prometheus/alertmanager/api/v2/client"
31 "github.com/prometheus/alertmanager/api/v2/client/alert"
32 "github.com/prometheus/alertmanager/api/v2/models"
33 "github.com/spf13/viper"
39 func (a *AlarmManager) StartAlertTimer() {
40 tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
43 for _, m := range a.activeAlarms {
44 app.Logger.Info("Re-raising alarm: %v", m)
45 a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
51 func (a *AlarmManager) Consume(rp *app.RMRParams) (err error) {
52 app.Logger.Info("Message received!")
54 defer app.Rmr.Free(rp.Mbuf)
56 case alarm.RIC_ALARM_UPDATE:
59 app.Logger.Info("Unknown Message Type '%d', discarding", rp.Mtype)
65 func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, error) {
66 var m alarm.AlarmMessage
67 app.Logger.Info("Received JSON: %s", rp.Payload)
68 if err := json.Unmarshal(rp.Payload, &m); err != nil {
69 app.Logger.Error("json.Unmarshal failed: %v", err)
72 app.Logger.Info("newAlarm: %v", m)
74 return a.ProcessAlarm(&m)
77 func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, error) {
78 if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
79 app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
83 // Suppress duplicate alarms
84 idx, found := a.IsMatchFound(m.Alarm)
85 if found && m.AlarmAction == alarm.AlarmActionRaise {
86 app.Logger.Info("Duplicate alarm found, suppressing ...")
87 if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
88 // Duplicate with same severity found
91 // Remove duplicate with different severity
92 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
96 // Clear alarm if found from active alarm list
97 if m.AlarmAction == alarm.AlarmActionClear {
99 a.alarmHistory = append(a.alarmHistory, *m)
100 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
101 if len(a.alarmHistory) >= a.maxAlarmHistory {
102 app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
103 histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history")
104 histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
105 a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
106 a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
109 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
112 app.Logger.Info("No matching active alarm found, suppressing ...")
116 // New alarm -> update active alarms and post to Alert Manager
117 if m.AlarmAction == alarm.AlarmActionRaise {
118 a.UpdateAlarmLists(m)
119 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
125 func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
126 for i, m := range a.activeAlarms {
127 if m.ManagedObjectId == newAlarm.ManagedObjectId && m.ApplicationId == newAlarm.ApplicationId &&
128 m.SpecificProblem == newAlarm.SpecificProblem && m.IdentifyingInfo == newAlarm.IdentifyingInfo {
135 func (a *AlarmManager) RemoveAlarm(alarms []alarm.AlarmMessage, i int, listName string) []alarm.AlarmMessage {
137 defer a.mutex.Unlock()
139 app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
140 copy(alarms[i:], alarms[i+1:])
141 return alarms[:len(alarms)-1]
144 func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
146 defer a.mutex.Unlock()
148 /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
149 The attempt to raise the alarm next time will be supressed when found as duplicate. */
150 if len(a.activeAlarms) >= a.maxActiveAlarms {
151 app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold")
152 actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "active")
153 actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
154 a.activeAlarms = append(a.activeAlarms, actAlarmMessage)
155 a.alarmHistory = append(a.alarmHistory, actAlarmMessage)
158 if len(a.alarmHistory) >= a.maxAlarmHistory {
159 app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
160 histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history")
161 histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
162 a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
163 a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
166 // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
167 a.activeAlarms = append(a.activeAlarms, *newAlarm)
168 a.alarmHistory = append(a.alarmHistory, *newAlarm)
171 func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
172 alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
173 amLabels := models.LabelSet{
174 "status": string(status),
175 "alertname": alarmDef.AlarmText,
176 "severity": string(newAlarm.PerceivedSeverity),
177 "service": fmt.Sprintf("%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
178 "system_name": fmt.Sprintf("RIC:%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
180 amAnnotations := models.LabelSet{
181 "alarm_id": fmt.Sprintf("%d", alarmDef.AlarmId),
182 "description": fmt.Sprintf("%d:%s:%s", newAlarm.SpecificProblem, newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo),
183 "additional_info": newAlarm.AdditionalInfo,
184 "summary": alarmDef.EventType,
185 "instructions": alarmDef.OperationInstructions,
186 "timestamp": fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
189 return amLabels, amAnnotations
192 func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager {
193 cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes)
194 return client.New(cr, strfmt.Default)
197 func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) {
198 pa := &models.PostableAlert{
200 GeneratorURL: strfmt.URI(""),
203 Annotations: amAnnotations,
205 alertParams := alert.NewPostAlertsParams().WithAlerts(models.PostableAlerts{pa})
207 app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations)
208 ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams)
210 app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err)
215 func (a *AlarmManager) StatusCB() bool {
217 app.Logger.Info("RMR not ready yet!")
223 func (a *AlarmManager) ConfigChangeCB(configparam string) {
225 a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
226 a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
227 a.alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
228 a.amHost = viper.GetString("controls.promAlertManager.address")
230 app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
231 app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
232 app.Logger.Debug("ConfigChangeCB: alertInterval %v", a.alertInterval)
233 app.Logger.Debug("ConfigChangeCB: amHost = %v", a.amHost)
238 func (a *AlarmManager) ReadAlarmDefinitionFromJson() {
240 filename := os.Getenv("DEF_FILE")
241 file, err := ioutil.ReadFile(filename)
243 data := RicAlarmDefinitions{}
244 err = json.Unmarshal([]byte(file), &data)
246 for _, alarmDefinition := range data.AlarmDefinitions {
247 _, exists := alarm.RICAlarmDefinitions[alarmDefinition.AlarmId]
249 app.Logger.Error("ReadAlarmDefinitionFromJson: alarm definition already exists for %v", alarmDefinition.AlarmId)
251 app.Logger.Debug("ReadAlarmDefinitionFromJson: alarm %v", alarmDefinition.AlarmId)
252 ricAlarmDefintion := new(alarm.AlarmDefinition)
253 ricAlarmDefintion.AlarmId = alarmDefinition.AlarmId
254 ricAlarmDefintion.AlarmText = alarmDefinition.AlarmText
255 ricAlarmDefintion.EventType = alarmDefinition.EventType
256 ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
257 alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
261 app.Logger.Error("json.Unmarshal failed with error %v", err)
264 app.Logger.Error("ioutil.ReadFile failed with error %v", err)
268 func (a *AlarmManager) Run(sdlcheck bool) {
269 app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
270 app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
271 app.Resource.InjectStatusCb(a.StatusCB)
272 app.AddConfigChangeListener(a.ConfigChangeCB)
274 alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
275 a.ReadAlarmDefinitionFromJson()
277 app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
278 app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
279 app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
280 app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
281 app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
282 app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
283 app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
284 app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
285 app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
286 app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET")
288 // Start background timer for re-raising alerts
289 a.postClear = sdlcheck
290 go a.StartAlertTimer()
291 a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
293 app.RunWithParams(a, sdlcheck)
296 func NewAlarmManager(amHost string, alertInterval int) *AlarmManager {
297 if alertInterval == 0 {
298 alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
302 amHost = viper.GetString("controls.promAlertManager.address")
305 return &AlarmManager{
308 amBaseUrl: viper.GetString("controls.promAlertManager.baseUrl"),
309 amSchemes: []string{viper.GetString("controls.promAlertManager.schemes")},
310 alertInterval: alertInterval,
311 activeAlarms: make([]alarm.AlarmMessage, 0),
312 alarmHistory: make([]alarm.AlarmMessage, 0),
313 maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
314 maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
320 NewAlarmManager("", 0).Run(true)