Bumping docker image version
[ric-plt/alarm-go.git] / manager / cmd / manager.go
1 /*
2  *  Copyright (c) 2020 AT&T Intellectual Property.
3  *  Copyright (c) 2020 Nokia.
4  *
5  *  Licensed under the Apache License, Version 2.0 (the "License");
6  *  you may not use this file except in compliance with the License.
7  *  You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  *
17  * This source code is part of the near-RT RIC (RAN Intelligent Controller)
18  * platform project (RICP).
19  */
20
21 package main
22
23 import (
24         "encoding/json"
25         "fmt"
26         "time"
27
28         clientruntime "github.com/go-openapi/runtime/client"
29         "github.com/go-openapi/strfmt"
30         "github.com/prometheus/alertmanager/api/v2/client"
31         "github.com/prometheus/alertmanager/api/v2/client/alert"
32         "github.com/prometheus/alertmanager/api/v2/models"
33         "github.com/spf13/viper"
34
35         "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
36         app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
37 )
38
39 func (a *AlarmManager) StartAlertTimer() {
40         tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
41         for range tick {
42                 a.mutex.Lock()
43                 for _, m := range a.activeAlarms {
44                         app.Logger.Info("Re-raising alarm: %v", m)
45                         a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive))
46                 }
47                 a.mutex.Unlock()
48         }
49 }
50
51 func (a *AlarmManager) Consume(rp *app.RMRParams) (err error) {
52         app.Logger.Info("Message received!")
53
54         defer app.Rmr.Free(rp.Mbuf)
55         switch rp.Mtype {
56         case alarm.RIC_ALARM_UPDATE:
57                 a.HandleAlarms(rp)
58         default:
59                 app.Logger.Info("Unknown Message Type '%d', discarding", rp.Mtype)
60         }
61
62         return nil
63 }
64
65 func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, error) {
66         var m alarm.AlarmMessage
67         app.Logger.Info("Received JSON: %s", rp.Payload)
68         if err := json.Unmarshal(rp.Payload, &m); err != nil {
69                 app.Logger.Error("json.Unmarshal failed: %v", err)
70                 return nil, err
71         }
72         app.Logger.Info("newAlarm: %v", m)
73
74         return a.ProcessAlarm(&m)
75 }
76
77 func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, error) {
78         if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
79                 app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
80                 return nil, nil
81         }
82
83         // Suppress duplicate alarms
84         idx, found := a.IsMatchFound(m.Alarm)
85         if found && m.AlarmAction != alarm.AlarmActionClear {
86                 app.Logger.Info("Duplicate alarm found, suppressing ...")
87                 return nil, nil
88         }
89
90         // Clear alarm if found from active alarm list
91         if m.AlarmAction == alarm.AlarmActionClear {
92                 if found {
93                         a.alarmHistory = append(a.alarmHistory, *m)
94                         a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
95
96                         if a.postClear {
97                                 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved))
98                         }
99                 }
100                 app.Logger.Info("No matching active alarm found, suppressing ...")
101                 return nil, nil
102         }
103
104         // New alarm -> update active alarms and post to Alert Manager
105         if m.AlarmAction == alarm.AlarmActionRaise {
106                 a.UpdateAlarmLists(m)
107                 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive))
108         }
109
110         return nil, nil
111 }
112
113 func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
114         for i, m := range a.activeAlarms {
115                 if m.ManagedObjectId == newAlarm.ManagedObjectId && m.ApplicationId == newAlarm.ApplicationId &&
116                         m.SpecificProblem == newAlarm.SpecificProblem && m.IdentifyingInfo == newAlarm.IdentifyingInfo {
117                         return i, true
118                 }
119         }
120         return -1, false
121 }
122
123 func (a *AlarmManager) RemoveAlarm(alarms []alarm.AlarmMessage, i int, listName string) []alarm.AlarmMessage {
124         a.mutex.Lock()
125         defer a.mutex.Unlock()
126
127         app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
128         copy(alarms[i:], alarms[i+1:])
129         return alarms[:len(alarms)-1]
130 }
131
132 func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
133         a.mutex.Lock()
134         defer a.mutex.Unlock()
135
136         // If maximum number of active alarms is reached, purge the oldest alarm
137         if len(a.activeAlarms) >= viper.GetInt("controls.maxActiveAlarms") {
138                 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, 0, "active")
139         }
140
141         if len(a.alarmHistory) >= viper.GetInt("controls.maxAlarmHistory") {
142                 a.alarmHistory = a.RemoveAlarm(a.alarmHistory, 0, "history")
143         }
144
145         // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
146         a.activeAlarms = append(a.activeAlarms, *newAlarm)
147         a.alarmHistory = append(a.alarmHistory, *newAlarm)
148 }
149
150 func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus) (models.LabelSet, models.LabelSet) {
151         alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
152         amLabels := models.LabelSet{
153                 "status":      string(status),
154                 "alertname":   alarmDef.AlarmText,
155                 "severity":    string(newAlarm.PerceivedSeverity),
156                 "service":     fmt.Sprintf("%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
157                 "system_name": fmt.Sprintf("RIC:%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
158         }
159         amAnnotations := models.LabelSet{
160                 "alarm_id":        fmt.Sprintf("%d", alarmDef.AlarmId),
161                 "description":     fmt.Sprintf("%d:%s:%s", newAlarm.SpecificProblem, newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo),
162                 "additional_info": newAlarm.AdditionalInfo,
163                 "summary":         alarmDef.EventType,
164                 "instructions":    alarmDef.OperationInstructions,
165         }
166
167         return amLabels, amAnnotations
168 }
169
170 func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager {
171         cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes)
172         return client.New(cr, strfmt.Default)
173 }
174
175 func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) {
176         pa := &models.PostableAlert{
177                 Alert: models.Alert{
178                         GeneratorURL: strfmt.URI(""),
179                         Labels:       amLabels,
180                 },
181                 Annotations: amAnnotations,
182         }
183         alertParams := alert.NewPostAlertsParams().WithAlerts(models.PostableAlerts{pa})
184
185         app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations)
186         ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams)
187         if err != nil {
188                 app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err)
189         }
190         return ok, err
191 }
192
193 func (a *AlarmManager) StatusCB() bool {
194         if !a.rmrReady {
195                 app.Logger.Info("RMR not ready yet!")
196         }
197
198         return a.rmrReady
199 }
200
201 func (a *AlarmManager) Run(sdlcheck bool) {
202         app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
203         app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
204         app.Resource.InjectStatusCb(a.StatusCB)
205
206         app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
207         app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
208         app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
209         app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
210
211         // Start background timer for re-raising alerts
212         a.postClear = sdlcheck
213         go a.StartAlertTimer()
214
215         app.RunWithParams(a, sdlcheck)
216 }
217
218 func NewAlarmManager(amHost string, alertInterval int) *AlarmManager {
219         if alertInterval == 0 {
220                 alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
221         }
222
223         if amHost == "" {
224                 amHost = viper.GetString("controls.promAlertManager.address")
225         }
226
227         return &AlarmManager{
228                 rmrReady:      false,
229                 amHost:        amHost,
230                 amBaseUrl:     viper.GetString("controls.promAlertManager.baseUrl"),
231                 amSchemes:     []string{viper.GetString("controls.promAlertManager.schemes")},
232                 alertInterval: alertInterval,
233                 activeAlarms:  make([]alarm.AlarmMessage, 0),
234                 alarmHistory:  make([]alarm.AlarmMessage, 0),
235         }
236 }
237
238 // Main function
239 func main() {
240         NewAlarmManager("", 0).Run(true)
241 }