LN0739_FM_FR12_EXT: added support for deleting the alarm definitions through cli
[ric-plt/alarm-go.git] / manager / cmd / manager.go
1 /*
2  *  Copyright (c) 2020 AT&T Intellectual Property.
3  *  Copyright (c) 2020 Nokia.
4  *
5  *  Licensed under the Apache License, Version 2.0 (the "License");
6  *  you may not use this file except in compliance with the License.
7  *  You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  *
17  * This source code is part of the near-RT RIC (RAN Intelligent Controller)
18  * platform project (RICP).
19  */
20
21 package main
22
23 import (
24         "encoding/json"
25         "fmt"
26         "time"
27
28         clientruntime "github.com/go-openapi/runtime/client"
29         "github.com/go-openapi/strfmt"
30         "github.com/prometheus/alertmanager/api/v2/client"
31         "github.com/prometheus/alertmanager/api/v2/client/alert"
32         "github.com/prometheus/alertmanager/api/v2/models"
33         "github.com/spf13/viper"
34
35         "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
36         app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
37 )
38
39 func (a *AlarmManager) StartAlertTimer() {
40         tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
41         for range tick {
42                 a.mutex.Lock()
43                 for _, m := range a.activeAlarms {
44                         app.Logger.Info("Re-raising alarm: %v", m)
45                         a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
46                 }
47                 a.mutex.Unlock()
48         }
49 }
50
51 func (a *AlarmManager) Consume(rp *app.RMRParams) (err error) {
52         app.Logger.Info("Message received!")
53
54         defer app.Rmr.Free(rp.Mbuf)
55         switch rp.Mtype {
56         case alarm.RIC_ALARM_UPDATE:
57                 a.HandleAlarms(rp)
58         default:
59                 app.Logger.Info("Unknown Message Type '%d', discarding", rp.Mtype)
60         }
61
62         return nil
63 }
64
65 func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, error) {
66         var m alarm.AlarmMessage
67         app.Logger.Info("Received JSON: %s", rp.Payload)
68         if err := json.Unmarshal(rp.Payload, &m); err != nil {
69                 app.Logger.Error("json.Unmarshal failed: %v", err)
70                 return nil, err
71         }
72         app.Logger.Info("newAlarm: %v", m)
73
74         return a.ProcessAlarm(&m)
75 }
76
77 func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, error) {
78         if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
79                 app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
80                 return nil, nil
81         }
82
83         // Suppress duplicate alarms
84         idx, found := a.IsMatchFound(m.Alarm)
85         if found && m.AlarmAction == alarm.AlarmActionRaise {
86                 app.Logger.Info("Duplicate alarm found, suppressing ...")
87                 if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
88                         // Duplicate with same severity found
89                         return nil, nil
90                 } else {
91                         // Remove duplicate with different severity
92                         a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
93                 }
94         }
95
96         // Clear alarm if found from active alarm list
97         if m.AlarmAction == alarm.AlarmActionClear {
98                 if found {
99                         a.alarmHistory = append(a.alarmHistory, *m)
100                         a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
101
102                         if a.postClear {
103                                 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
104                         }
105                 }
106                 app.Logger.Info("No matching active alarm found, suppressing ...")
107                 return nil, nil
108         }
109
110         // New alarm -> update active alarms and post to Alert Manager
111         if m.AlarmAction == alarm.AlarmActionRaise {
112                 a.UpdateAlarmLists(m)
113                 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
114         }
115
116         return nil, nil
117 }
118
119 func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
120         for i, m := range a.activeAlarms {
121                 if m.ManagedObjectId == newAlarm.ManagedObjectId && m.ApplicationId == newAlarm.ApplicationId &&
122                         m.SpecificProblem == newAlarm.SpecificProblem && m.IdentifyingInfo == newAlarm.IdentifyingInfo {
123                         return i, true
124                 }
125         }
126         return -1, false
127 }
128
129 func (a *AlarmManager) RemoveAlarm(alarms []alarm.AlarmMessage, i int, listName string) []alarm.AlarmMessage {
130         a.mutex.Lock()
131         defer a.mutex.Unlock()
132
133         app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
134         copy(alarms[i:], alarms[i+1:])
135         return alarms[:len(alarms)-1]
136 }
137
138 func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
139         a.mutex.Lock()
140         defer a.mutex.Unlock()
141
142         /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
143            The attempt to raise the alarm next time will be supressed when found as duplicate. */
144         if len(a.activeAlarms) >= a.maxActiveAlarms {
145                 app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold")
146                 actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "active alarms full")
147                 actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
148                 a.activeAlarms = append(a.activeAlarms, actAlarmMessage)
149                 a.alarmHistory = append(a.alarmHistory, actAlarmMessage)
150         }
151
152         if len(a.alarmHistory) >= a.maxAlarmHistory {
153                 app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
154                 histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "alarm history full")
155                 histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
156                 a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
157                 a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
158         }
159
160         // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
161         a.activeAlarms = append(a.activeAlarms, *newAlarm)
162         a.alarmHistory = append(a.alarmHistory, *newAlarm)
163 }
164
165 func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
166         alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
167         amLabels := models.LabelSet{
168                 "status":      string(status),
169                 "alertname":   alarmDef.AlarmText,
170                 "severity":    string(newAlarm.PerceivedSeverity),
171                 "service":     fmt.Sprintf("%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
172                 "system_name": fmt.Sprintf("RIC:%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
173         }
174         amAnnotations := models.LabelSet{
175                 "alarm_id":        fmt.Sprintf("%d", alarmDef.AlarmId),
176                 "description":     fmt.Sprintf("%d:%s:%s", newAlarm.SpecificProblem, newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo),
177                 "additional_info": newAlarm.AdditionalInfo,
178                 "summary":         alarmDef.EventType,
179                 "instructions":    alarmDef.OperationInstructions,
180                 "timestamp":       fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
181         }
182
183         return amLabels, amAnnotations
184 }
185
186 func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager {
187         cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes)
188         return client.New(cr, strfmt.Default)
189 }
190
191 func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) {
192         pa := &models.PostableAlert{
193                 Alert: models.Alert{
194                         GeneratorURL: strfmt.URI(""),
195                         Labels:       amLabels,
196                 },
197                 Annotations: amAnnotations,
198         }
199         alertParams := alert.NewPostAlertsParams().WithAlerts(models.PostableAlerts{pa})
200
201         app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations)
202         ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams)
203         if err != nil {
204                 app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err)
205         }
206         return ok, err
207 }
208
209 func (a *AlarmManager) StatusCB() bool {
210         if !a.rmrReady {
211                 app.Logger.Info("RMR not ready yet!")
212         }
213
214         return a.rmrReady
215 }
216
217 func (a *AlarmManager) ConfigChangeCB(configparam string) {
218
219         a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
220         a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
221
222         app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
223         app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
224
225         return
226 }
227
228 func (a *AlarmManager) Run(sdlcheck bool) {
229         app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
230         app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
231         app.Resource.InjectStatusCb(a.StatusCB)
232         app.AddConfigChangeListener(a.ConfigChangeCB)
233
234         alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
235
236         app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
237         app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
238         app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
239         app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
240         app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
241         app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
242         app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
243         app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
244         app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
245         app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET")
246
247         // Start background timer for re-raising alerts
248         a.postClear = sdlcheck
249         go a.StartAlertTimer()
250         a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
251
252         app.RunWithParams(a, sdlcheck)
253 }
254
255 func NewAlarmManager(amHost string, alertInterval int) *AlarmManager {
256         if alertInterval == 0 {
257                 alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
258         }
259
260         if amHost == "" {
261                 amHost = viper.GetString("controls.promAlertManager.address")
262         }
263
264         return &AlarmManager{
265                 rmrReady:        false,
266                 amHost:          amHost,
267                 amBaseUrl:       viper.GetString("controls.promAlertManager.baseUrl"),
268                 amSchemes:       []string{viper.GetString("controls.promAlertManager.schemes")},
269                 alertInterval:   alertInterval,
270                 activeAlarms:    make([]alarm.AlarmMessage, 0),
271                 alarmHistory:    make([]alarm.AlarmMessage, 0),
272                 maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
273                 maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
274         }
275 }
276
277 // Main function
278 func main() {
279         NewAlarmManager("", 0).Run(true)
280 }