adding robot unit testcases for alarmgo, cli based testing
[ric-plt/alarm-go.git] / manager / cmd / manager.go
1 /*
2  *  Copyright (c) 2020 AT&T Intellectual Property.
3  *  Copyright (c) 2020 Nokia.
4  *
5  *  Licensed under the Apache License, Version 2.0 (the "License");
6  *  you may not use this file except in compliance with the License.
7  *  You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  *
17  * This source code is part of the near-RT RIC (RAN Intelligent Controller)
18  * platform project (RICP).
19  */
20
21 package main
22
23 import (
24         "encoding/json"
25         "fmt"
26         "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
27         app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
28         clientruntime "github.com/go-openapi/runtime/client"
29         "github.com/go-openapi/strfmt"
30         "github.com/prometheus/alertmanager/api/v2/client"
31         "github.com/prometheus/alertmanager/api/v2/client/alert"
32         "github.com/prometheus/alertmanager/api/v2/models"
33         "github.com/spf13/viper"
34         "io/ioutil"
35         "os"
36         "time"
37 )
38
39 func (a *AlarmManager) StartAlertTimer() {
40         tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
41         for range tick {
42                 a.mutex.Lock()
43                 for _, m := range a.activeAlarms {
44                         app.Logger.Info("Re-raising alarm: %v", m)
45                         a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
46                 }
47                 a.mutex.Unlock()
48         }
49 }
50
51 func (a *AlarmManager) Consume(rp *app.RMRParams) (err error) {
52         app.Logger.Info("Message received!")
53
54         defer app.Rmr.Free(rp.Mbuf)
55         switch rp.Mtype {
56         case alarm.RIC_ALARM_UPDATE:
57                 a.HandleAlarms(rp)
58         default:
59                 app.Logger.Info("Unknown Message Type '%d', discarding", rp.Mtype)
60         }
61
62         return nil
63 }
64
65 func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, error) {
66         var m alarm.AlarmMessage
67         app.Logger.Info("Received JSON: %s", rp.Payload)
68         if err := json.Unmarshal(rp.Payload, &m); err != nil {
69                 app.Logger.Error("json.Unmarshal failed: %v", err)
70                 return nil, err
71         }
72         app.Logger.Info("newAlarm: %v", m)
73
74         return a.ProcessAlarm(&m)
75 }
76
77 func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, error) {
78         if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
79                 app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
80                 return nil, nil
81         }
82
83         // Suppress duplicate alarms
84         idx, found := a.IsMatchFound(m.Alarm)
85         if found && m.AlarmAction == alarm.AlarmActionRaise {
86                 app.Logger.Info("Duplicate alarm found, suppressing ...")
87                 if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
88                         // Duplicate with same severity found
89                         return nil, nil
90                 } else {
91                         // Remove duplicate with different severity
92                         a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
93                 }
94         }
95
96         // Clear alarm if found from active alarm list
97         if m.AlarmAction == alarm.AlarmActionClear {
98                 if found {
99                         a.alarmHistory = append(a.alarmHistory, *m)
100                         a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
101                         if len(a.alarmHistory) >= a.maxAlarmHistory {
102                                 app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
103                                 histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history")
104                                 histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
105                                 a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
106                                 a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
107                         }
108                         if a.postClear {
109                                 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
110                         }
111                 }
112                 app.Logger.Info("No matching active alarm found, suppressing ...")
113                 return nil, nil
114         }
115
116         // New alarm -> update active alarms and post to Alert Manager
117         if m.AlarmAction == alarm.AlarmActionRaise {
118                 a.UpdateAlarmLists(m)
119                 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
120         }
121
122         return nil, nil
123 }
124
125 func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
126         for i, m := range a.activeAlarms {
127                 if m.ManagedObjectId == newAlarm.ManagedObjectId && m.ApplicationId == newAlarm.ApplicationId &&
128                         m.SpecificProblem == newAlarm.SpecificProblem && m.IdentifyingInfo == newAlarm.IdentifyingInfo {
129                         return i, true
130                 }
131         }
132         return -1, false
133 }
134
135 func (a *AlarmManager) RemoveAlarm(alarms []alarm.AlarmMessage, i int, listName string) []alarm.AlarmMessage {
136         a.mutex.Lock()
137         defer a.mutex.Unlock()
138
139         app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
140         copy(alarms[i:], alarms[i+1:])
141         return alarms[:len(alarms)-1]
142 }
143
144 func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
145         a.mutex.Lock()
146         defer a.mutex.Unlock()
147
148         /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
149            The attempt to raise the alarm next time will be supressed when found as duplicate. */
150         if len(a.activeAlarms) >= a.maxActiveAlarms {
151                 app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold")
152                 actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "active")
153                 actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
154                 a.activeAlarms = append(a.activeAlarms, actAlarmMessage)
155                 a.alarmHistory = append(a.alarmHistory, actAlarmMessage)
156         }
157
158         if len(a.alarmHistory) >= a.maxAlarmHistory {
159                 app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
160                 histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "threshold", "history")
161                 histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
162                 a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
163                 a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
164         }
165
166         // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
167         a.activeAlarms = append(a.activeAlarms, *newAlarm)
168         a.alarmHistory = append(a.alarmHistory, *newAlarm)
169 }
170
171 func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
172         alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
173         amLabels := models.LabelSet{
174                 "status":      string(status),
175                 "alertname":   alarmDef.AlarmText,
176                 "severity":    string(newAlarm.PerceivedSeverity),
177                 "service":     fmt.Sprintf("%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
178                 "system_name": fmt.Sprintf("RIC:%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
179         }
180         amAnnotations := models.LabelSet{
181                 "alarm_id":        fmt.Sprintf("%d", alarmDef.AlarmId),
182                 "description":     fmt.Sprintf("%d:%s:%s", newAlarm.SpecificProblem, newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo),
183                 "additional_info": newAlarm.AdditionalInfo,
184                 "summary":         alarmDef.EventType,
185                 "instructions":    alarmDef.OperationInstructions,
186                 "timestamp":       fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
187         }
188
189         return amLabels, amAnnotations
190 }
191
192 func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager {
193         cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes)
194         return client.New(cr, strfmt.Default)
195 }
196
197 func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) {
198         pa := &models.PostableAlert{
199                 Alert: models.Alert{
200                         GeneratorURL: strfmt.URI(""),
201                         Labels:       amLabels,
202                 },
203                 Annotations: amAnnotations,
204         }
205         alertParams := alert.NewPostAlertsParams().WithAlerts(models.PostableAlerts{pa})
206
207         app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations)
208         ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams)
209         if err != nil {
210                 app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err)
211         }
212         return ok, err
213 }
214
215 func (a *AlarmManager) StatusCB() bool {
216         if !a.rmrReady {
217                 app.Logger.Info("RMR not ready yet!")
218         }
219
220         return a.rmrReady
221 }
222
223 func (a *AlarmManager) ConfigChangeCB(configparam string) {
224
225         a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
226         a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
227         a.alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
228         a.amHost = viper.GetString("controls.promAlertManager.address")
229
230         app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
231         app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
232         app.Logger.Debug("ConfigChangeCB: alertInterval %v", a.alertInterval)
233         app.Logger.Debug("ConfigChangeCB: amHost = %v", a.amHost)
234
235         return
236 }
237
238 func (a *AlarmManager) ReadAlarmDefinitionFromJson() {
239
240         filename := os.Getenv("DEF_FILE")
241         file, err := ioutil.ReadFile(filename)
242         if err == nil {
243                 data := RicAlarmDefinitions{}
244                 err = json.Unmarshal([]byte(file), &data)
245                 if err == nil {
246                         for _, alarmDefinition := range data.AlarmDefinitions {
247                                 _, exists := alarm.RICAlarmDefinitions[alarmDefinition.AlarmId]
248                                 if exists {
249                                         app.Logger.Error("ReadAlarmDefinitionFromJson: alarm definition already exists for %v", alarmDefinition.AlarmId)
250                                 } else {
251                                         app.Logger.Debug("ReadAlarmDefinitionFromJson: alarm  %v", alarmDefinition.AlarmId)
252                                         ricAlarmDefintion := new(alarm.AlarmDefinition)
253                                         ricAlarmDefintion.AlarmId = alarmDefinition.AlarmId
254                                         ricAlarmDefintion.AlarmText = alarmDefinition.AlarmText
255                                         ricAlarmDefintion.EventType = alarmDefinition.EventType
256                                         ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
257                                         alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
258                                 }
259                         }
260                 } else {
261                         app.Logger.Error("json.Unmarshal failed with error %v", err)
262                 }
263         } else {
264                 app.Logger.Error("ioutil.ReadFile failed with error %v", err)
265         }
266 }
267
268 func (a *AlarmManager) Run(sdlcheck bool) {
269         app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
270         app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
271         app.Resource.InjectStatusCb(a.StatusCB)
272         app.AddConfigChangeListener(a.ConfigChangeCB)
273
274         alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
275         a.ReadAlarmDefinitionFromJson()
276
277         app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
278         app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
279         app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
280         app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
281         app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
282         app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
283         app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
284         app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
285         app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
286         app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET")
287
288         // Start background timer for re-raising alerts
289         a.postClear = sdlcheck
290         go a.StartAlertTimer()
291         a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
292
293         app.RunWithParams(a, sdlcheck)
294 }
295
296 func NewAlarmManager(amHost string, alertInterval int) *AlarmManager {
297         if alertInterval == 0 {
298                 alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
299         }
300
301         if amHost == "" {
302                 amHost = viper.GetString("controls.promAlertManager.address")
303         }
304
305         return &AlarmManager{
306                 rmrReady:        false,
307                 amHost:          amHost,
308                 amBaseUrl:       viper.GetString("controls.promAlertManager.baseUrl"),
309                 amSchemes:       []string{viper.GetString("controls.promAlertManager.schemes")},
310                 alertInterval:   alertInterval,
311                 activeAlarms:    make([]alarm.AlarmMessage, 0),
312                 alarmHistory:    make([]alarm.AlarmMessage, 0),
313                 maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
314                 maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
315         }
316 }
317
318 // Main function
319 func main() {
320         NewAlarmManager("", 0).Run(true)
321 }