Fix the 2.0.3 version image pull failed
[pti/o2.git] / configs / events.yaml
1 ---
2
3 #
4 # Copyright (c) 2013-2021 Wind River Systems, Inc.
5 #
6 # SPDX-License-Identifier: Apache-2.0
7 #
8
9 ############################################################################
10 #
11 # ALARM & CUSTOMER LOG DOCUMENTATION
12 #
13 ############################################################################
14
15 ############################################################################
16 #
17 # Record Format ... for documentation
18 #
19 # 100.001:
20 #   Type: < Alarm |  Log >
21 #   Description: < yaml string >
22 #                OR
23 #                [ < yaml string >,      // list of yaml strings
24 #                  < yaml string >  ]
25 #                OR
26 #                critical: < yaml string >     // i.e. dictionary of yaml strings indexed by severity
27 #                major:    < yaml string >
28 #                minor:    < yaml string >
29 #                warning:  < yaml string >
30 #   Entity_Instance_ID: < yaml string ... e.g. host=<hostname>.interface=<ifname> >
31 #                       OR
32 #                       [ < yaml string >,      // list of yaml strings
33 #                         < yaml string >  ]
34 #   Severity: < critical |  major |  minor |  warning >
35 #                       OR
36 #                       [ critical, major ]      // list of severity values
37 #   Proposed_Repair_Action: < yaml string >      // NOTE ALARM ONLY FIELD
38 #                           OR
39 #                           critical: < yaml string >     // i.e. dictionary of yaml strings indexed by severity
40 #                           major:    < yaml string >
41 #                           minor:    < yaml string >
42 #                           warning:  < yaml string >
43 #   Maintenance_Action: < yaml string >          // NOTE ALARM ONLY FIELD
44 #                       OR
45 #                       critical: < yaml string >     // i.e. dictionary of yaml strings indexed by severity
46 #                       major:    < yaml string >
47 #                       minor:    < yaml string >
48 #                       warning:  < yaml string >
49 #   Inhibit_Alarms: < True | False >            // NOTE ALARM ONLY FIELD
50 #   Alarm_Type: < operational-violation | ... >
51 #   Probable_Cause: < timing-problem | ... >
52 #                   OR
53 #                   [ < timing-problem | ... >,      // list of probable-causes
54 #                     < timing-problem | ... >  ]
55 #   Service_Affecting: < True | False >
56 #   Suppression: < True | False >               // NOTE ALARM ONLY FIELD
57 #   Management_Affecting_Severity: < none | critical | major | minor | warning >
58 #       // lowest alarm level of this type that will block forced upgrades & orchestration actions
59 #   Degrade_Affecting_Severity: < none | critical | major | minor >
60 #       // lowest alarm level of this type sets a host to 'degraded'
61 #
62 #
63 #   Other Notes:
64 #      - use general record format above
65 #      - the only dictionaries allowed are ones indexed by severity
66 #      - if there are multiple lists in a record,
67 #        then they should all have the same # of items and corresponding list items represent instance of alarm
68 #      - if you can't describe the alarm/log based on the above rules,
69 #        then you can use a multi-line string format
70 #      - DELETING alarms from events.yaml: alarms should only be deleted when going to a new Titanium Cloud release
71 #      - if all possible alarm severities are mgmt affecting, the convention is to
72 #        use 'warning' as the Management_Affecting_Severity, even if warning is not a possible severity for that alarm
73 #
74 #   Testing:
75 #      - Testing of events.yaml can be done by running regular make command
76 #        and specifying fm-doc:
77 #                nice -n 20 ionice -c Idle make -C build fm-doc.rebuild
78 #      - When building, events.yaml will be parsed for correct format, and also
79 #        to ensure that Alarm IDs defined in constants.py and fmAlarm.h are
80 #        listed in events.yaml
81 #
82 ############################################################################
83
84
85 #---------------------------------------------------------------------------
86 #   Monitored Resource Alarms
87 #---------------------------------------------------------------------------
88
89
90 100.101:
91     Type: Alarm
92     Description: |-
93         Platform CPU threshold exceeded; threshold x%, actual y% .
94              CRITICAL @ 95%
95              MAJOR    @ 90%
96     Entity_Instance_ID: host=<hostname>
97     Severity: [critical, major]
98     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
99     Maintenance_Action:
100         critical: degrade
101         major: degrade
102     Inhibit_Alarms:
103     Alarm_Type: operational-violation
104     Probable_Cause: threshold-crossed
105     Service_Affecting: False
106     Suppression: True
107     Management_Affecting_Severity: major
108     Degrade_Affecting_Severity: critical
109
110 100.102:
111     Type: Alarm
112     Description: |-
113         VSwitch CPU threshold exceeded; threshold x%, actual y% .
114              CRITICAL @ 95%
115              MAJOR    @ 90%
116              MINOR    @ 80%
117     Entity_Instance_ID: host=<hostname>
118     Severity: [critical, major, minor]
119     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
120     Maintenance_Action:
121         critical: degrade
122         major: degrade
123     Inhibit_Alarms:
124     Alarm_Type: operational-violation
125     Probable_Cause: threshold-crossed
126     Service_Affecting: False
127     Suppression: True
128     Management_Affecting_Severity: none
129     Degrade_Affecting_Severity: none
130
131 100.103:
132     Type: Alarm
133     Description: |-
134         Memory threshold exceeded; threshold x%, actual y% .
135              CRITICAL @ 90%
136              MAJOR    @ 80%
137     Entity_Instance_ID: |-
138         host=<hostname>
139         OR
140         host=<hostname>.memory=total
141         OR
142         host=<hostname>.memory=platform
143         OR
144         host=<hostname>.numa=node<number>
145     Severity: [critical, major]
146     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support; may require additional memory on Host."
147     Maintenance_Action:
148         critical: degrade
149         major: degrade
150     Inhibit_Alarms:
151     Alarm_Type: operational-violation
152     Probable_Cause: threshold-crossed
153     Service_Affecting: False
154     Suppression: True
155     Management_Affecting_Severity: none
156     Degrade_Affecting_Severity: critical
157
158 100.104:    # NOTE This should really be split into two different Alarms.
159     Type: Alarm
160     Description: |-
161         host=<hostname>.filesystem=<mount-dir>
162             File System threshold exceeded; threshold x%, actual y% .
163                 CRITICAL @ 90%
164                 MAJOR    @ 80%
165         OR
166         host=<hostname>.volumegroup=<volumegroup-name>
167             Monitor and if condition persists, consider adding additional physical volumes to the volume group.
168     Entity_Instance_ID: |-
169         host=<hostname>.filesystem=<mount-dir>
170         OR
171         host=<hostname>.volumegroup=<volumegroup-name>
172     Severity: [critical, major]
173     Proposed_Repair_Action: "Reduce usage or resize filesystem."
174     Maintenance_Action:
175         critical: degrade
176         major: degrade
177     Inhibit_Alarms:
178     Alarm_Type: operational-violation
179     Probable_Cause: threshold-crossed
180     Service_Affecting: False
181     Suppression: True
182     Management_Affecting_Severity: critical
183     Degrade_Affecting_Severity: critical
184
185 100.105:
186     Type: Alarm
187     Description: |-
188         Filesystem Alarm Condition:
189         <fs_name> filesystem is not added on both controllers and/or does not have the same size: <hostname>.
190     Entity_Instance_ID: fs_name=<image-conversion>
191     Severity: critical
192     Proposed_Repair_Action: "Add image-conversion filesystem on both controllers.
193                              Consult the System Administration Manual for more details.
194                              If problem persists, contact next level of support."
195     Maintenance_Action: degrade
196     Inhibit_Alarms:
197     Alarm_Type: equipment
198     Probable_Cause: configuration-or-customization-error
199     Service_Affecting: True
200     Suppression: False
201     Management_Affecting_Severity: major
202     Degrade_Affecting_Severity: none
203
204 #--------
205 # 100.105: Retired (with R2 release): previously monitored /etc/nova/instances
206 #          NFS mount from controller to computes
207 #--------
208
209 100.106:
210     Type: Alarm
211     Description: "'OAM' Port failed."
212     Entity_Instance_ID: host=<hostname>.port=<port-name>
213     Severity: major
214     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
215     Maintenance_Action: degrade
216     Inhibit_Alarms:
217     Alarm_Type: operational-violation
218     Probable_Cause: unknown
219     Service_Affecting: True
220     Suppression: True
221     Management_Affecting_Severity: warning
222     Degrade_Affecting_Severity: major
223
224 100.107:
225     Type: Alarm
226     Description: |-
227         'OAM' Interface degraded.
228         OR
229         'OAM' Interface failed.
230     Entity_Instance_ID: host=<hostname>.interface=<if-name>
231     Severity: [critical, major]
232     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
233     Maintenance_Action:
234         critical: degrade
235         major: degrade
236     Inhibit_Alarms:
237     Alarm_Type: operational-violation
238     Probable_Cause: unknown
239     Service_Affecting: True
240     Suppression: True
241     Management_Affecting_Severity: warning
242     Degrade_Affecting_Severity: major
243
244 100.108:
245     Type: Alarm
246     Description: "'MGMT' Port failed."
247     Entity_Instance_ID: host=<hostname>.port=<port-name>
248     Severity: major
249     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
250     Maintenance_Action: degrade
251     Inhibit_Alarms:
252     Alarm_Type: operational-violation
253     Probable_Cause: unknown
254     Service_Affecting: True
255     Suppression: True
256     Management_Affecting_Severity: warning
257     Degrade_Affecting_Severity: major
258
259 100.109:
260     Type: Alarm
261     Description: |-
262         'MGMT' Interface degraded.
263         OR
264         'MGMT' Interface failed.
265     Entity_Instance_ID: host=<hostname>.interface=<if-name>
266     Severity: [critical, major]
267     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
268     Maintenance_Action:
269         critical: degrade
270         major: degrade
271     Inhibit_Alarms:
272     Alarm_Type: operational-violation
273     Probable_Cause: unknown
274     Service_Affecting: True
275     Suppression: True
276     Management_Affecting_Severity: warning
277     Degrade_Affecting_Severity: major
278
279 100.110:
280     Type: Alarm
281     Description: "'CLUSTER-HOST' Port failed."
282     Entity_Instance_ID: host=<hostname>.port=<port-name>
283     Severity: major
284     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
285     Maintenance_Action: degrade
286     Inhibit_Alarms:
287     Alarm_Type: operational-violation
288     Probable_Cause: unknown
289     Service_Affecting: True
290     Suppression: True
291     Management_Affecting_Severity: warning
292     Degrade_Affecting_Severity: major
293
294 100.111:
295     Type: Alarm
296     Description: |-
297         'CLUSTER-HOST' Interface degraded.
298         OR
299         'CLUSTER-HOST' Interface failed.
300     Entity_Instance_ID: host=<hostname>.interface=<if-name>
301     Severity: [critical, major]
302     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
303     Maintenance_Action:
304         critical: degrade
305         major: degrade
306     Inhibit_Alarms:
307     Alarm_Type: operational-violation
308     Probable_Cause: unknown
309     Service_Affecting: True
310     Suppression: True
311     Management_Affecting_Severity: warning
312     Degrade_Affecting_Severity: major
313
314 100.112:
315     Type: Alarm
316     Description: "'DATA-VRS' Port down."
317     Entity_Instance_ID: host=<hostname>.port=<port-name>
318     Severity: major
319     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
320     Maintenance_Action: degrade
321     Inhibit_Alarms:
322     Alarm_Type: operational-violation
323     Probable_Cause: unknown
324     Service_Affecting: True
325     Suppression: True
326     Management_Affecting_Severity: none
327     Degrade_Affecting_Severity: major
328
329 100.113:
330     Type: Alarm
331     Description: |-
332         'DATA-VRS' Interface degraded.
333         OR
334         'DATA-VRS' Interface down.
335     Entity_Instance_ID: host=<hostname>.interface=<if-name>
336     Severity: [critical, major]
337     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
338     Maintenance_Action:
339         major: degrade
340     Inhibit_Alarms:
341     Alarm_Type: operational-violation
342     Probable_Cause: unknown
343     Service_Affecting: True
344     Suppression: True
345     Management_Affecting_Severity: none
346     Degrade_Affecting_Severity: major
347
348 100.114:
349     Type: Alarm
350     Description:
351         major: "NTP configuration does not contain any valid or reachable NTP servers."
352         minor: "NTP address <IP address>  is not a valid or a reachable NTP server."
353     Entity_Instance_ID:
354         major: host=<hostname>.ntp
355         minor: host=<hostname>.ntp=<IP address>
356     Severity: [major, minor]
357     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
358     Maintenance_Action: none
359     Inhibit_Alarms:
360     Alarm_Type: communication
361     Probable_Cause: unknown
362     Service_Affecting: False
363     Suppression: False
364     Management_Affecting_Severity: none
365     Degrade_Affecting_Severity: none
366
367 100.115:
368     Type: Alarm
369     Description: "VSwitch Memory Usage, processor <processor> threshold exceeded; threshold x%, actual y% ."
370     Entity_Instance_ID: host=<hostname>.processor=<processor>
371     Severity: [critical, major, minor]
372     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
373     Maintenance_Action:
374         critical: degrade
375         major: degrade
376     Inhibit_Alarms:
377     Alarm_Type: operational-violation
378     Probable_Cause: threshold-crossed
379     Service_Affecting: False
380     Suppression: True
381     Management_Affecting_Severity: none
382     Degrade_Affecting_Severity: critical
383
384 100.116:
385     Type: Alarm
386     Description: "Cinder LVM Thinpool Usage threshold exceeded; threshold x%, actual y% ."
387     Entity_Instance_ID: host=<hostname>
388     Severity: [critical, major, minor]
389     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
390     Maintenance_Action:
391         critical: degrade
392         major: degrade
393     Inhibit_Alarms:
394     Alarm_Type: operational-violation
395     Probable_Cause: threshold-crossed
396     Service_Affecting: False
397     Suppression: True
398     Management_Affecting_Severity: none
399     Degrade_Affecting_Severity: critical
400
401 100.117:
402     Type: Alarm
403     Description: "Nova LVM Thinpool Usage threshold exceeded; threshold x%, actual y% ."
404     Entity_Instance_ID: host=<hostname>
405     Severity: [critical, major, minor]
406     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
407     Maintenance_Action:
408         critical: degrade
409         major: degrade
410     Inhibit_Alarms:
411     Alarm_Type: operational-violation
412     Probable_Cause: threshold-crossed
413     Service_Affecting: False
414     Suppression: True
415     Management_Affecting_Severity: major
416     Degrade_Affecting_Severity: critical
417
418 100.118:
419     Type: Alarm
420     Description: Controller cannot establish connection with remote logging server.
421     Entity_Instance_ID: host=<hostname>
422     Severity: minor
423     Proposed_Repair_Action: "Ensure Remote Log Server IP is reachable from Controller through OAM interface; otherwise contact next level of support."
424     Maintenance_Action: none
425     Inhibit_Alarms: False
426     Alarm_Type: communication
427     Probable_Cause: communication-subsystem-failure
428     Service_Affecting: False
429     Suppression: False
430     Management_Affecting_Severity: none
431     Degrade_Affecting_Severity: none
432
433 100.119:
434     Type: Alarm
435     Description: |-
436         <hostname> does not support the provisioned PTP mode
437         OR
438         <hostname> PTP clocking is out-of-tolerance
439         OR
440         <hostname> is not locked to remote PTP Grand Master
441         OR
442         <hostname> GNSS signal loss state:<state>
443         OR
444         <hostname> 1PPS signal loss state:<state>
445     Entity_Instance_ID: |-
446         host=<hostname>.ptp
447         OR
448         host=<hostname>.ptp=no-lock
449         OR
450         host=<hostname>.ptp=<interface>.unsupported=hardware-timestamping
451         OR
452         host=<hostname>.ptp=<interface>.unsupported=software-timestamping
453         OR
454         host=<hostname>.ptp=<interface>.unsupported=legacy-timestamping
455         OR
456         host=<hostname>.ptp=out-of-tolerance
457         OR
458         host=<hostname>.instance=<instance>.ptp=out-of-tolerance
459         OR
460         host=<hostname>.interface=<interface>.ptp=signal-loss
461     Severity: [major, minor]
462     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
463     Maintenance_Action: none
464     Inhibit_Alarms:
465     Alarm_Type: communication
466     Probable_Cause: unknown
467     Service_Affecting: False
468     Suppression: False
469     Management_Affecting_Severity: none
470     Degrade_Affecting_Severity: none
471
472 100.150:
473     Type: Alarm
474     Description:
475         critical: "service open file descriptor has reached its limit"
476         major: "service open file descriptor is approaching to its limit"
477     Entity_Instance_ID: |-
478         host=<hostname>.resource_type=file-descriptor.service_name=<service-name>
479     Severity: [critical, major]
480     Proposed_Repair_Action: "swact to the other controller if it is available"
481     Maintenance_Action: none
482     Inhibit_Alarms:
483     Alarm_Type: operational-violation
484     Probable_Cause: threshold-crossed
485     Service_Affecting: True
486     Suppression: False
487     Management_Affecting_Severity: critical
488     Degrade_Affecting_Severity: critical
489
490 #---------------------------------------------------------------------------
491 #   MAINTENANCE
492 #---------------------------------------------------------------------------
493
494
495 200.001:
496     Type: Alarm
497     Description: <hostname> was administratively locked to take it out-of-service.
498     Entity_Instance_ID: host=<hostname>
499     Severity: warning
500     Proposed_Repair_Action: Administratively unlock Host to bring it back in-service.
501     Maintenance_Action: none
502     Inhibit_Alarms: True
503     Alarm_Type: operational-violation
504     Probable_Cause: out-of-service
505     Service_Affecting: True
506     Suppression: False
507     Management_Affecting_Severity: warning
508     Degrade_Affecting_Severity: none
509
510 200.004:
511     Type: Alarm
512     Description: |-
513         <hostname> experienced a service-affecting failure.
514         Host is being auto recovered by Reboot.
515     Entity_Instance_ID: host=<hostname>
516     Severity: critical
517     Proposed_Repair_Action: If auto-recovery is consistently unable to recover host to the unlocked-enabled state contact next level of support or lock and replace failing host.
518     Maintenance_Action: auto recover
519     Inhibit_Alarms: False
520     Alarm_Type: operational-violation
521     Probable_Cause: application-subsystem-failure
522     Service_Affecting: True
523     Suppression: True
524     Management_Affecting_Severity: warning
525     Degrade_Affecting_Severity: none
526
527 200.011:
528     Type: Alarm
529     Description: <hostname> experienced a configuration failure during initialization. Host is being re-configured by Reboot.
530     Entity_Instance_ID: host=<hostname>
531     Severity: critical
532     Proposed_Repair_Action: If auto-recovery is consistently unable to recover host to the unlocked-enabled state contact next level of support or lock and replace failing host.
533     Maintenance_Action: auto-recover
534     Inhibit_Alarms: False
535     Alarm_Type: operational-violation
536     Probable_Cause: configuration-or-customization-error
537     Service_Affecting: True
538     Suppression: True
539     Management_Affecting_Severity: warning
540     Degrade_Affecting_Severity: none
541
542 200.010:
543     Type: Alarm
544     Description: <hostname> access to board management module has failed.
545     Entity_Instance_ID: host=<hostname>
546     Severity: warning
547     Proposed_Repair_Action: Check Host's board management configuration and connectivity.
548     Maintenance_Action: auto recover
549     Inhibit_Alarms: False
550     Alarm_Type: operational-violation
551     Probable_Cause: communication-subsystem-failure
552     Service_Affecting: False
553     Suppression: False
554     Management_Affecting_Severity: none
555     Degrade_Affecting_Severity: none
556
557 200.012:
558     Type: Alarm
559     Description: <hostname> controller function has in-service failure while compute services remain healthy.
560     Entity_Instance_ID: host=<hostname>
561     Severity: major
562     Proposed_Repair_Action: Lock and then Unlock host to recover. Avoid using 'Force Lock' action as that will impact compute services running on this host.  If lock action fails then contact next level of support to investigate and recover.
563     Maintenance_Action: "degrade - requires manual action"
564     Inhibit_Alarms: False
565     Alarm_Type: operational-violation
566     Probable_Cause: communication-subsystem-failure
567     Service_Affecting: True
568     Suppression: True
569     Management_Affecting_Severity: warning
570     Degrade_Affecting_Severity: major
571
572 200.013:
573     Type: Alarm
574     Description: <hostname> compute service of the only available controller is not poperational. Auto-recovery is disabled. Deggrading host instead.
575     Entity_Instance_ID: host=<hostname>
576     Severity: major
577     Proposed_Repair_Action: Enable second controller and Switch Activity (Swact) over to it as soon as possible. Then Lock and Unlock host to recover its local compute service.
578     Maintenance_Action: "degrade - requires manual action"
579     Inhibit_Alarms: False
580     Alarm_Type: operational-violation
581     Probable_Cause: communication-subsystem-failure
582     Service_Affecting: True
583     Suppression: True
584     Management_Affecting_Severity: warning
585     Degrade_Affecting_Severity: major
586
587 200.005:
588     Type: Alarm
589     Description: |-
590         Degrade:
591         <hostname> is experiencing an intermittent 'Management Network'  communication failures that have exceeded its lower alarming threshold.
592
593         Failure:
594         <hostname> is experiencing a persistent critical 'Management Network' communication failure."
595     Entity_Instance_ID: host=<hostname>
596     Severity: [critical, major]
597     Proposed_Repair_Action: "Check 'Management Network' connectivity and support for multicast messaging. If problem consistently occurs after that and Host is reset, then contact next level of support or lock and replace failing host."
598     Maintenance_Action: auto recover
599     Inhibit_Alarms: False
600     Alarm_Type: communication
601     Probable_Cause: unknown
602     Service_Affecting: True
603     Suppression: True
604     Management_Affecting_Severity: warning
605     Degrade_Affecting_Severity: none
606
607 200.009:
608     Type: Alarm
609     Description: |-
610         Degrade:
611         <hostname> is experiencing an intermittent 'Cluster-host Network'  communication failures that have exceeded its lower alarming threshold.
612
613         Failure:
614         <hostname> is experiencing a persistent critical 'Cluster-host Network' communication failure."
615     Entity_Instance_ID: host=<hostname>
616     Severity: [critical, major]
617     Proposed_Repair_Action: "Check 'Cluster-host Network' connectivity and support for multicast messaging. If problem consistently occurs after that and Host is reset, then contact next level of support or lock and replace failing host."
618     Maintenance_Action: auto recover
619     Inhibit_Alarms: False
620     Alarm_Type: communication
621     Probable_Cause: unknown
622     Service_Affecting: True
623     Suppression: True
624     Management_Affecting_Severity: warning
625     Degrade_Affecting_Severity: none
626
627
628 200.006:
629     Type: Alarm
630     Description: |-
631         Main Process Monitor Daemon Failure (major):
632             <hostname> 'Process Monitor' (pmond) process is not running or functioning properly. The system is trying to recover this process.
633
634         Monitored Process Failure (critical/major/minor):
635             Critical: <hostname> critical '<processname>' process has failed and could not be auto-recovered gracefully.
636                       Auto-recovery progression by host reboot is required and in progress.
637             Major:    <hostname> is degraded due to the failure of its '<processname>' process. Auto recovery of this major process is in progress.
638             Minor:    <hostname> '<processname>' process has failed. Auto recovery of this minor process is in progress.
639                       OR
640                       <hostname> '<processname>' process has failed. Manual recovery is required.
641     Entity_Instance_ID: host=<hostname>.process=<processname>
642     Severity: [critical, major, minor]
643     Proposed_Repair_Action: |-
644         If this alarm does not automatically clear after some time and continues to be asserted after Host is locked and unlocked then contact next level of support for root cause analysis and recovery.
645
646         If problem consistently occurs after Host is locked and unlocked then contact next level of support for root cause analysis and recovery."
647     Maintenance_Action:
648         critical: auto-recover
649         major: degrade
650         minor:
651     Inhibit_Alarms: False
652     Alarm_Type: operational-violation
653     Probable_Cause: unknown
654     Service_Affecting:
655         critical: True
656         major: True
657         minor: False
658     Suppression: True
659     Management_Affecting_Severity: warning
660     Degrade_Affecting_Severity: major
661
662 # 200.006:      // NOTE using duplicate ID of a completely analogous Alarm for this
663 #     Type: Log
664 #     Description: |-
665 #         Main Process Monitor Daemon Failure (major)
666 #             <hostname> 'Process Monitor' (pmond) process is not running or functioning properly.
667 #             The system is trying to recover this process.
668 #
669 #         Monitored Process Failure (critical/major/minor)
670 #             critical: <hostname> critical '<processname>' process has failed and could not be auto-recovered gracefully.
671 #                       Auto-recovery progression by host reboot is required and in progress.
672 #             major:    <hostname> is degraded due to the failure of its '<processname>' process. Auto recovery of this major process is in progress.
673 #             minor:    <hostname> '<processname>' process has failed. Auto recovery of this minor process is in progress.
674 #                       OR
675 #                       <hostname> '<processname>' process has failed. Manual recovery is required.
676 #     Entity_Instance_ID: host=<hostname>.process=<process-name>
677 #     Severity: minor
678 #     Alarm_Type: other
679 #     Probable_Cause: unspecified-reason
680 #     Service_Affecting: True
681
682
683 200.007:
684     Type: Alarm
685     Description:
686         critical: "Host is degraded due to a 'critical' out-of-tolerance reading from  the '<sensorname>' sensor"
687         major: "Host is degraded due to a 'major' out-of-tolerance reading from  the '<sensorname>' sensor"
688         minor: "Host is reporting a 'minor' out-of-tolerance reading from the '<sensorname>' sensor"
689     Entity_Instance_ID: host=<hostname>.sensor=<sensorname>
690     Severity: [critical, major, minor]
691     Proposed_Repair_Action: "If problem consistently occurs after Host is power cycled and or reset, contact next level of support or lock and replace failing host."
692     Maintenance_Action:
693         critical: degrade
694         major: degrade
695         minor: auto-recover (polling)
696     Inhibit_Alarms:
697     Alarm_Type: operational-violation
698     Probable_Cause: unspecified-reason
699     Service_Affecting:
700         critical: True
701         major: False
702         minor: False
703     Suppression: True
704     Management_Affecting_Severity: none
705     Degrade_Affecting_Severity: critical
706
707 200.014:
708     Type: Alarm
709     Description: "The Hardware Monitor was unable to load, configure and monitor one or more hardware sensors."
710     Entity_Instance_ID: host=<hostname>
711     Severity: minor
712     Proposed_Repair_Action: Check Board Management Controller provisioning. Try reprovisioning the BMC. If problem persists try power cycling the host and then the entire server including the BMC power. If problem persists then contact next level of support.
713     Maintenance_Action: None
714     Inhibit_Alarms: False
715     Alarm_Type: operational-violation
716     Probable_Cause: unknown
717     Service_Affecting: False
718     Suppression: True
719     Management_Affecting_Severity: none
720     Degrade_Affecting_Severity: none
721
722 200.015:
723     Type: Alarm
724     Description: Unable to read one or more sensor groups from this host's board management controller
725     Entity_Instance_ID: host=<hostname>
726     Severity: major
727     Proposed_Repair_Action: Check board management connectivity and try rebooting the board management controller. If problem persists contact next level of support or lock and replace failing host.
728     Maintenance_Action: None
729     Inhibit_Alarms: False
730     Alarm_Type: operational-violation
731     Probable_Cause: unknown
732     Service_Affecting: False
733     Suppression: False
734     Management_Affecting_Severity: none
735     Degrade_Affecting_Severity: none
736
737
738 200.020:
739     Type: Log
740     Description: ["<hostname> has been 'discovered' on the network",
741                   "<hostname> has been 'added' to the system",
742                   "<hostname> has 'entered' multi-node failure avoidance",
743                   "<hostname> has 'exited' multi-node failure avoidance"]
744     Entity_Instance_ID: [host=<hostname>.event=discovered,
745                          host=<hostname>.event=add,
746                          host=<hostname>.event=mnfa_enter,
747                          host=<hostname>.event=mnfa_exit]
748     Severity: warning
749     Alarm_Type: other
750     Probable_Cause: unspecified-reason
751     Service_Affecting: True
752
753
754 200.021:
755     Type: Log
756     Description: ["<hostname> board management controller has been 'provisioned'",
757                   "<hostname> board management controller has been 're-provisioned'",
758                   "<hostname> board management controller has been 'de-provisioned'",
759                   "<hostname> manual 'unlock' request",
760                   "<hostname> manual 'reboot' request",
761                   "<hostname> manual 'reset' request",
762                   "<hostname> manual 'power-off' request",
763                   "<hostname> manual 'power-on' request",
764                   "<hostname> manual 'reinstall' request",
765                   "<hostname> manual 'force-lock' request",
766                   "<hostname> manual 'delete' request",
767                   "<hostname> manual 'controller switchover' request"]
768     Entity_Instance_ID: [host=<hostname>.command=provision,
769                          host=<hostname>.command=reprovision,
770                          host=<hostname>.command=deprovision,
771                          host=<hostname>.command=unlock,
772                          host=<hostname>.command=reboot,
773                          host=<hostname>.command=reset,
774                          host=<hostname>.command=power-off,
775                          host=<hostname>.command=power-on,
776                          host=<hostname>.command=reinstall,
777                          host=<hostname>.command=force-lock,
778                          host=<hostname>.command=delete,
779                          host=<hostname>.command=swact]
780     Severity: warning
781     Alarm_Type: other
782     Probable_Cause: unspecified-reason
783     Service_Affecting: False
784
785
786 200.022:
787     Type: Log
788     Description: ["<hostname> is now 'disabled'",
789                   "<hostname> is now 'enabled'",
790                   "<hostname> is now 'online'",
791                   "<hostname> is now 'offline'",
792                   "<hostname> is 'disabled-failed' to the system",
793                   "<hostname> reinstall failed",
794                   "<hostname> reinstall completed successfully"]
795     Entity_Instance_ID: [host=<hostname>.state=disabled,
796                          host=<hostname>.state=enabled,
797                          host=<hostname>.status=online,
798                          host=<hostname>.status=offline,
799                          host=<hostname>.status=failed,
800                          host=<hostname>.status=reinstall-failed,
801                          host=<hostname>.status=reinstall-complete]
802     Severity: warning
803     Alarm_Type: other
804     Probable_Cause: unspecified-reason
805     Service_Affecting: True
806
807
808 #---------------------------------------------------------------------------
809 #   BACKUP AND RESTORE
810 #---------------------------------------------------------------------------
811
812 210.001:
813     Type: Alarm
814     Description: System Backup in progress.
815     Entity_Instance_ID: host=controller
816     Severity: minor
817     Proposed_Repair_Action: No action required.
818     Maintenance_Action:
819     Inhibit_Alarms:
820     Alarm_Type: operational-violation
821     Probable_Cause: unspecified-reason
822     Service_Affecting: False
823     Suppression: False
824     Management_Affecting_Severity: warning
825     Degrade_Affecting_Severity: none
826
827
828 #---------------------------------------------------------------------------
829 #   SYSTEM CONFIGURATION
830 #---------------------------------------------------------------------------
831
832 250.001:
833     Type: Alarm
834     Description: <hostname> Configuration is out-of-date.
835     Entity_Instance_ID: host=<hostname>
836     Severity: major
837     Proposed_Repair_Action: Administratively lock and unlock <hostname>  to update config.
838     Maintenance_Action:
839     Inhibit_Alarms:
840     Alarm_Type: operational-violation
841     Probable_Cause: unspecified-reason
842     Service_Affecting: True
843     Suppression: False
844     Management_Affecting_Severity: warning
845     Degrade_Affecting_Severity: none
846
847 250.002:
848     Type: Alarm
849     Description: <hostname> Ceph cache tiering configuration is out-of-date.
850     Entity_Instance_ID: cluster=<dist-fs-uuid>
851     Severity: major
852     Proposed_Repair_Action: Apply Ceph service parameter settings.
853     Maintenance_Action:
854     Inhibit_Alarms:
855     Alarm_Type: operational-violation
856     Probable_Cause: unspecified-reason
857     Service_Affecting: False
858     Suppression: False
859     Management_Affecting_Severity: warning
860     Degrade_Affecting_Severity: none
861
862 250.003:
863     Type: Alarm
864     Description: "Kubernetes certificates rotation failed on host[, reason = <reason_text>]"
865     Entity_Instance_ID: host=<hostname>
866     Severity: major
867     Proposed_Repair_Action: Lock and unlock the host to update services with new certificates (Manually renew kubernetes certificates first if renewal failed).
868     Maintenance_Action:
869     Inhibit_Alarms:
870     Alarm_Type: operational-violation
871     Probable_Cause: unspecified-reason
872     Service_Affecting: False
873     Suppression: False
874     Management_Affecting_Severity: warning
875     Degrade_Affecting_Severity: none
876
877 #---------------------------------------------------------------------------
878 #   Deployment Manager Monitor
879 #---------------------------------------------------------------------------
880 260.001:
881     Type: Alarm
882     Description: "Deployment Manager resource not reconciled: <name>"
883     Entity_Instance_ID: resource=<crd-resource>,name=<resource-name>
884     Severity: major
885     Proposed_Repair_Action: Monitor and if condition persists, validate deployment configuration.
886     Maintenance_Action:
887     Inhibit_Alarms:
888     Alarm_Type: operational-violation
889     Probable_Cause: configuration-out-of-date
890     Service_Affecting: True
891     Suppression: True
892     Management_Affecting_Severity: warning
893     Degrade_Affecting_Severity: none
894
895 #---------------------------------------------------------------------------
896 #   VM Compute Services
897 #---------------------------------------------------------------------------
898 270.001:
899     Type: Alarm
900     Description: "Host <host_name> compute services failure[, reason = <reason_text>]"
901     Entity_Instance_ID: host=<host_name>.services=compute
902     Severity: critical
903     Proposed_Repair_Action: Wait for host services recovery to complete; if problem persists contact next level of support
904     Maintenance_Action:
905     Inhibit_Alarms:
906     Alarm_Type: processing-error
907     Probable_Cause: unspecified-reason
908     Service_Affecting: True
909     Suppression: True
910     Management_Affecting_Severity: warning
911     Degrade_Affecting_Severity: none
912
913 270.101:
914     Type: Log
915     Description: "Host <host_name> compute services failure[, reason = <reason_text>]"
916     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
917     Severity: critical
918     Alarm_Type: equipment
919     Probable_Cause: unspecified-reason
920     Service_Affecting: False
921
922 270.102:
923     Type: Log
924     Description: Host <host_name> compute services enabled
925     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
926     Severity: critical
927     Alarm_Type: equipment
928     Probable_Cause: unspecified-reason
929     Service_Affecting: False
930
931 270.103:
932     Type: Log
933     Description: Host <host_name> compute services disabled
934     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
935     Severity: critical
936     Alarm_Type: equipment
937     Probable_Cause: unspecified-reason
938     Service_Affecting: False
939
940
941 275.001:
942     Type: Log
943     Description: Host <host_name> hypervisor is now <administrative_state>-<operational_state>
944     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
945     Severity: critical
946     Alarm_Type: equipment
947     Probable_Cause: unspecified-reason
948     Service_Affecting: False
949
950
951 #---------------------------------------------------------------------------
952 #   DISTRIBUTED CLOUD
953 #---------------------------------------------------------------------------
954
955 280.001:
956     Type: Alarm
957     Description: <subcloud> is offline
958     Entity_Instance_ID: subcloud=<subcloud>
959     Severity: critical
960     Proposed_Repair_Action: Wait for subcloud to become online; if problem persists contact next level of support
961     Maintenance_Action:
962     Inhibit_Alarms:
963     Alarm_Type: communication
964     Probable_Cause: loss-of-signal
965     Service_Affecting: False
966     Suppression: False
967     Management_Affecting_Severity: none
968     Degrade_Affecting_Severity: none
969
970 280.002:
971     Type: Alarm
972     Description: <subcloud> <resource> sync_status is out-of-sync
973     Entity_Instance_ID: [subcloud=<subcloud>.resource=<compute | network | platform | volumev2>]
974     Severity: major
975     Proposed_Repair_Action: If problem persists contact next level of support
976     Maintenance_Action:
977     Inhibit_Alarms:
978     Alarm_Type: other
979     Probable_Cause: application-subsystem-failure
980     Service_Affecting: False
981     Suppression: False
982     Management_Affecting_Severity: none
983     Degrade_Affecting_Severity: none
984
985 280.003:
986     Type: Alarm
987     Description: Subcloud Backup Failure
988     Entity_Instance_ID: subcloud=<subcloud>
989     Severity: minor
990     Proposed_Repair_Action: Retry subcloud backup after checking backup input file. If problem persists contact next level of support.
991     Maintenance_Action:
992     Inhibit_Alarms:
993     Alarm_Type: processing-error
994     Probable_Cause: unknown
995     Service_Affecting: False
996     Suppression: False
997     Management_Affecting_Severity: none
998     Degrade_Affecting_Severity: none
999
1000 #---------------------------------------------------------------------------
1001 #   NETWORK
1002 #---------------------------------------------------------------------------
1003
1004 300.001:
1005     Type: Alarm
1006     Description: "'Data' Port failed."
1007     Entity_Instance_ID: host=<hostname>.port=<port-uuid>
1008     Severity: major
1009     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1010     Maintenance_Action:
1011     Inhibit_Alarms:
1012     Alarm_Type: equipment
1013     Probable_Cause: loss-of-signal
1014     Service_Affecting: True
1015     Suppression: False
1016     Management_Affecting_Severity: warning
1017     Degrade_Affecting_Severity: none
1018
1019
1020 300.002:
1021     Type: Alarm
1022     Description: |-
1023         'Data' Interface degraded.
1024         OR
1025         'Data' Interface failed.
1026     Entity_Instance_ID: host=<hostname>.interface=<if-uuid>
1027     Severity: [critical, major]
1028     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1029     Maintenance_Action:
1030     Inhibit_Alarms:
1031     Alarm_Type: equipment
1032     Probable_Cause: loss-of-signal
1033     Service_Affecting: True
1034     Suppression: False
1035     Management_Affecting_Severity: warning
1036     Degrade_Affecting_Severity: critical
1037
1038
1039 300.003:
1040     Type: Alarm
1041     Description: Networking Agent not responding.
1042     Entity_Instance_ID: host=<hostname>.agent=<agent-uuid>
1043     Severity: major
1044     Proposed_Repair_Action: "If condition persists, attempt to clear issue by administratively locking and unlocking the Host."
1045     Maintenance_Action:
1046     Inhibit_Alarms:
1047     Alarm_Type: operational-violation
1048     Probable_Cause: underlying-resource-unavailable
1049     Service_Affecting: True
1050     Suppression: False
1051     Management_Affecting_Severity: warning
1052     Degrade_Affecting_Severity: none
1053
1054
1055 300.004:
1056     Type: Alarm
1057     Description: No enabled compute host with connectivity to provider network.
1058     Entity_Instance_ID: service=networking.providernet=<pnet-uuid>
1059     Severity: major
1060     Proposed_Repair_Action: Enable compute hosts with required provider network connectivity.
1061     Maintenance_Action:
1062     Inhibit_Alarms:
1063     Alarm_Type: operational-violation
1064     Probable_Cause: underlying-resource-unavailable
1065     Service_Affecting: True
1066     Suppression: False
1067     Management_Affecting_Severity: warning
1068     Degrade_Affecting_Severity: none
1069
1070
1071 300.005:
1072     Type: Alarm
1073     Description: |-
1074         Communication failure detected over provider network x% for ranges y% on host z%.
1075         OR
1076         Communication failure detected over provider network x% on host z%.
1077     Entity_Instance_ID: host=<hostname>.service=networking.providernet=<pnet-uuid>
1078     Severity: major
1079     Proposed_Repair_Action: Check neighbour switch port VLAN assignments.
1080     Maintenance_Action:
1081     Inhibit_Alarms:
1082     Alarm_Type: operational-violation
1083     Probable_Cause: underlying-resource-unavailable
1084     Service_Affecting: True
1085     Suppression: False
1086     Management_Affecting_Severity: warning
1087     Degrade_Affecting_Severity: none
1088
1089
1090 300.010:
1091     Type: Alarm
1092     Description: |-
1093         ML2 Driver Agent non-reachable
1094         OR
1095         ML2 Driver Agent reachable but non-responsive
1096         OR
1097         ML2 Driver Agent authentication failure
1098         OR
1099         ML2 Driver Agent is unable to sync Neutron database
1100     Entity_Instance_ID: host=<hostname>.ml2driver=<driver>
1101     Severity: major
1102     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
1103     Maintenance_Action:
1104     Inhibit_Alarms:
1105     Alarm_Type: processing-error
1106     Probable_Cause: underlying-resource-unavailable
1107     Service_Affecting: True
1108     Suppression: True
1109     Management_Affecting_Severity: warning
1110     Degrade_Affecting_Severity: none
1111
1112
1113 300.012:
1114     Type: Alarm
1115     Description: "Openflow Controller connection failed."
1116     Entity_Instance_ID: host=<hostname>.openflow-controller=<uri>
1117     Severity: major
1118     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1119     Maintenance_Action:
1120     Inhibit_Alarms:
1121     Alarm_Type: equipment
1122     Probable_Cause: loss-of-signal
1123     Service_Affecting: True
1124     Suppression: False
1125     Management_Affecting_Severity: warning
1126     Degrade_Affecting_Severity: critical
1127
1128
1129 300.013:
1130     Type: Alarm
1131     Description: |-
1132         No active Openflow controller connections found for this network.
1133         OR
1134         One or more Openflow controller connections in disconnected state for this network.
1135     Entity_Instance_ID: host=<hostname>.openflow-network=<name>
1136     Severity: [critical, major]
1137     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1138     Maintenance_Action:
1139     Inhibit_Alarms:
1140     Alarm_Type: equipment
1141     Probable_Cause: loss-of-signal
1142     Service_Affecting: True
1143     Suppression: False
1144     Management_Affecting_Severity: warning
1145     Degrade_Affecting_Severity: critical
1146
1147
1148 300.014:
1149     Type: Alarm
1150     Description: "OVSDB Manager connection failed."
1151     Entity_Instance_ID: host=<hostname>.sdn-controller=<uuid>
1152     Severity: major
1153     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1154     Maintenance_Action:
1155     Inhibit_Alarms:
1156     Alarm_Type: equipment
1157     Probable_Cause: loss-of-signal
1158     Service_Affecting: True
1159     Suppression: False
1160     Management_Affecting_Severity: warning
1161     Degrade_Affecting_Severity: critical
1162
1163
1164 300.015:
1165     Type: Alarm
1166     Description: "No active OVSDB connections found."
1167     Entity_Instance_ID: host=<hostname>
1168     Severity: critical
1169     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1170     Maintenance_Action:
1171     Inhibit_Alarms:
1172     Alarm_Type: equipment
1173     Probable_Cause: loss-of-signal
1174     Service_Affecting: True
1175     Suppression: False
1176     Management_Affecting_Severity: warning
1177     Degrade_Affecting_Severity: critical
1178
1179 300.016:
1180     Type: Alarm
1181     Description: "Dynamic routing agent x% lost connectivity to peer y%."
1182     Entity_Instance_ID: host=<hostname>,agent=<agent-uuid>,bgp-peer=<bgp-peer>
1183     Severity: major
1184     Proposed_Repair_Action: If condition persists, fix connectivity to peer.
1185     Maintenance_Action:
1186     Inhibit_Alarms:
1187     Alarm_Type: operational-violation
1188     Probable_Cause: loss-of-signal
1189     Service_Affecting: True
1190     Suppression: True
1191     Management_Affecting_Severity: warning
1192     Degrade_Affecting_Severity: none
1193
1194
1195 #---------------------------------------------------------------------------
1196 #   HIGH AVAILABILITY
1197 #---------------------------------------------------------------------------
1198
1199 400.001:
1200     Type: Alarm
1201     Description: |-
1202         Service group failure; <list of affected services>.
1203         OR
1204         Service group degraded; <list of affected services>.
1205         OR
1206         Service group warning; <list of affected services>.
1207     Entity_Instance_ID: service_domain=<domain_name>.service_group=<group_name>.host=<hostname>
1208     Severity: [critical, major, minor]
1209     Proposed_Repair_Action: Contact next level of support.
1210     Maintenance_Action:
1211     Inhibit_Alarms: False
1212     Alarm_Type: processing-error
1213     Probable_Cause: underlying-resource-unavailable
1214     Service_Affecting: True
1215     Suppression: True
1216     Management_Affecting_Severity: warning
1217     Degrade_Affecting_Severity: major
1218
1219
1220 400.002:
1221     Type: Alarm
1222     Description: |-
1223         Service group loss of redundancy; expected <num> standby member<s> but only <num> standby member<s> available.
1224         OR
1225         Service group loss of redundancy; expected <num> standby member<s> but only <num> standby member<s> available.
1226         OR
1227         Service group loss of redundancy; expected <num> active member<s> but no active members available.
1228         OR
1229         Service group loss of redundancy; expected <num> active member<s> but only <num> active member<s> available.
1230     Entity_Instance_ID: service_domain=<domain_name>.service_group=<group_name>
1231     Severity: major
1232     Proposed_Repair_Action: "Bring a controller node back in to service, otherwise contact next level of support."
1233     Maintenance_Action:
1234     Inhibit_Alarms: False
1235     Alarm_Type: processing-error
1236     Probable_Cause: underlying-resource-unavailable
1237     Service_Affecting: True
1238     Suppression: True
1239     Management_Affecting_Severity: warning
1240     Degrade_Affecting_Severity: none
1241
1242
1243 400.003:
1244     Type: Alarm
1245     Description: |-
1246         License key is not installed; a valid license key is required for operation.
1247         OR
1248         License key has expired or is invalid; a valid license key is required for operation.
1249         OR
1250         Evaluation license key will expire on <date>; there are <num_days> days remaining in this evaluation.
1251         OR
1252         Evaluation license key will expire on <date>; there is only 1 day remaining in this evaluation.
1253     Entity_Instance_ID: host=<hostname>
1254     Severity: critical
1255     Proposed_Repair_Action: Contact next level of support to obtain a new license key.
1256     Maintenance_Action:
1257     Inhibit_Alarms: False
1258     Alarm_Type: processing-error
1259     Probable_Cause: key-expired
1260     Service_Affecting: True
1261     Suppression: False
1262     Management_Affecting_Severity: critical
1263     Degrade_Affecting_Severity: none
1264
1265
1266 # 400.004:    // NOTE Removed
1267 #     Type: Alarm
1268 #     Description: Service group software modification detected; <list of affected files>.
1269 #     Entity_Instance_ID: host=<hostname>
1270 #     Severity: major
1271 #     Proposed_Repair_Action: Contact next level of support.
1272 #     Maintenance_Action:
1273 #     Inhibit_Alarms: False
1274 #     Alarm_Type: processing-error
1275 #     Probable_Cause: software-program-error
1276 #     Service_Affecting: True
1277 #     Suppression: False
1278
1279
1280 400.005:
1281     Type: Alarm
1282     Description: |-
1283         Communication failure detected with peer over port <linux-ifname>.
1284         OR
1285         Communication failure detected with peer over port <linux-ifname> within the last 30 seconds.
1286     Entity_Instance_ID: host=<hostname>.network=<mgmt | oam | cluster-host>
1287     Severity: major
1288     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1289     Maintenance_Action:
1290     Inhibit_Alarms: False
1291     Alarm_Type: communication
1292     Probable_Cause: underlying-resource-unavailable
1293     Service_Affecting: True
1294     Suppression: True
1295     Management_Affecting_Severity: warning
1296     Degrade_Affecting_Severity: none
1297
1298
1299 #---------------------------------------------------------------------------
1300 #   SM
1301 #---------------------------------------------------------------------------
1302
1303 401.001:
1304     Type: Log
1305     Description: Service group <group> state change from <state> to <state> on host <host_name>
1306     Entity_Instance_ID: service_domain=<domain>.service_group=<group>.host=<host_name>
1307     Severity: critical
1308     Alarm_Type: processing-error
1309     Probable_Cause: unspecified-reason
1310     Service_Affecting: True
1311
1312 401.002:
1313     Type: Log
1314     Description: |-
1315         Service group <group> loss of redundancy; expected <X> standby member but no standby members available
1316         or
1317         Service group <group> loss of redundancy; expected <X> standby member but only <Y> standby member(s) available
1318         or
1319         Service group <group> has no active members available; expected <X> active member(s)
1320         or
1321         Service group <group> loss of redundancy; expected <X> active member(s) but only <Y> active member(s) available
1322     Entity_Instance_ID: service_domain=<domain>.service_group=<group>
1323     Severity: critical
1324     Alarm_Type: processing-error
1325     Probable_Cause: unspecified-reason
1326     Service_Affecting: True
1327
1328 401.003:
1329     Type: Log
1330     Description: |-
1331         License key has expired or is invalid
1332         or
1333         Evaluation license key will expire on <date>
1334         or
1335         License key is valid
1336     Entity_Instance_ID: host=<host_name>
1337     Severity: critical
1338     Alarm_Type: processing-error
1339     Probable_Cause: unspecified-reason
1340     Service_Affecting: True
1341
1342 401.005:
1343     Type: Log
1344     Description: |-
1345         Communication failure detected with peer over port <port> on host <host name>
1346         or
1347         Communication failure detected with peer over port <port> on host <host name>  within the last <X> seconds
1348         or
1349         Communication established  with peer over port <port> on host <host name>
1350     Entity_Instance_ID: host=<host_name>.network=<network>
1351     Severity: critical
1352     Alarm_Type: processing-error
1353     Probable_Cause: unspecified-reason
1354     Service_Affecting: True
1355
1356 401.007:
1357     Type: Log
1358     Description: Swact or swact-force
1359     Entity_Instance_ID: host=<host_name>
1360     Severity: critical
1361     Alarm_Type: processing-error
1362     Probable_Cause: unspecified-reason
1363     Service_Affecting: True
1364
1365
1366 #---------------------------------------------------------------------------
1367 #   SECURITY
1368 #---------------------------------------------------------------------------
1369
1370 500.100:
1371     Type: Alarm
1372     Description: TPM initialization failed on host.
1373     Entity_Instance_ID: host=<hostname>
1374     Severity: major
1375     Proposed_Repair_Action: reinstall HTTPS certificate; if problem persists contact next level of support.
1376     Maintenance_Action: degrade
1377     Inhibit_Alarms:
1378     Alarm_Type: equipment
1379     Probable_Cause: procedural-error
1380     Service_Affecting: True
1381     Suppression: False
1382     Management_Affecting_Severity: none
1383     Degrade_Affecting_Severity: none
1384
1385 500.101:
1386     Type: Alarm
1387     Description: Developer patch certificate enabled.
1388     Entity_Instance_ID: host=controller
1389     Severity: critical
1390     Proposed_Repair_Action: Reinstall system to disable developer certificate and remove untrusted patches.
1391     Maintenance_Action:
1392     Inhibit_Alarms:
1393     Alarm_Type: operational-violation
1394     Probable_Cause: unspecified-reason
1395     Service_Affecting: False
1396     Suppression: False
1397     Management_Affecting_Severity: none
1398     Degrade_Affecting_Severity: none
1399
1400 500.200:
1401     Type: Alarm
1402     Description: |-
1403         Certificate 'system certificate-show <uuid>' (mode=<ssl/ssl_ca/docker_registry/openstack/openstack_ca>) expiring soon on <date>.
1404         OR
1405         Certificate '<Namespace>/<Certificate/Secret>' expiring soon on <date>.
1406         OR
1407         Certificate '<k8sRootCA/EtcdCA>' expiring soon on <date>.
1408     Entity_Instance_ID: |-
1409         system.certificate.mode=<mode>.uuid=<uuid>
1410         OR
1411         namespace=<namespace-name>.certificate=<certificate-name>
1412         OR
1413         namespace=<namespace-name>.secret=<secret-name>
1414         OR
1415         system.certificate.k8sRootCA
1416     Severity: major
1417     Proposed_Repair_Action: Check certificate expiration time. Renew certificate for the entity identified.
1418     Maintenance_Action:
1419     Inhibit_Alarms:
1420     Alarm_Type: operational-violation
1421     Probable_Cause: certificate-expiration
1422     Service_Affecting: False
1423     Suppression: False
1424     Management_Affecting_Severity: none
1425     Degrade_Affecting_Severity: none
1426
1427 500.210:
1428     Type: Alarm
1429     Description: |-
1430         Certificate 'system certificate-show <uuid>' (mode=<ssl/ssl_ca/docker_registry/openstack/openstack_ca>) expired.
1431         OR
1432         Certificate '<Namespace>/<Certificate/Secret>' expired.
1433         OR
1434         Certificate '<k8sRootCA/EtcdRootCA>' expired.
1435     Entity_Instance_ID: |-
1436         system.certificate.mode=<mode>.uuid=<uuid>
1437         OR
1438         namespace=<namespace-name>.certificate=<certificate-name>
1439         OR
1440         namespace=<namespace-name>.secret=<secret-name>
1441         OR
1442         system.certificate.k8sRootCA
1443     Severity: critical
1444     Proposed_Repair_Action: Check certificate expiration time. Renew certificate for the entity identified.
1445     Maintenance_Action:
1446     Inhibit_Alarms:
1447     Alarm_Type: operational-violation
1448     Probable_Cause: certificate-expiration
1449     Service_Affecting: False
1450     Suppression: False
1451     Management_Affecting_Severity: none
1452     Degrade_Affecting_Severity: none
1453
1454 500.500:
1455     Type: Log
1456     Description: "Host <host_name> has IMA Appraisal failure for service <service> when executing <file>, reason = <reason_text>]"
1457     Entity_Instance_ID: host=<hostname>.service=<service>
1458     Severity: major
1459     Alarm_Type: integrity-violation
1460     Probable_Cause: information-modification-detected
1461     Service_Affecting: False
1462
1463
1464 #---------------------------------------------------------------------------
1465 #   VM
1466 #---------------------------------------------------------------------------
1467
1468 700.001:
1469     Type: Alarm
1470     Description: |-
1471         Instance <instance_name> owned by <tenant_name> has failed on host <host_name>
1472         Instance <instance_name> owned by <tenant_name> has failed to schedule
1473     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1474     Severity: critical
1475     Proposed_Repair_Action: The system will attempt recovery; no repair action required
1476     Maintenance_Action:
1477     Inhibit_Alarms:
1478     Alarm_Type: processing-error
1479     Probable_Cause: software-error
1480     Service_Affecting: True
1481     Suppression: True
1482     Management_Affecting_Severity: warning
1483     Degrade_Affecting_Severity: none
1484
1485 700.002:
1486     Type: Alarm
1487     Description: Instance <instance_name> owned by <tenant_name> is paused on host <host_name>
1488     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1489     Severity: critical
1490     Proposed_Repair_Action: Unpause the instance
1491     Maintenance_Action:
1492     Inhibit_Alarms:
1493     Alarm_Type: processing-error
1494     Probable_Cause: procedural-error
1495     Service_Affecting: True
1496     Suppression: True
1497     Management_Affecting_Severity: warning
1498     Degrade_Affecting_Severity: none
1499
1500 700.003:
1501     Type: Alarm
1502     Description: Instance <instance_name> owned by <tenant_name> is suspended on host <host_name>
1503     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1504     Severity: critical
1505     Proposed_Repair_Action: Resume the instance
1506     Maintenance_Action:
1507     Inhibit_Alarms:
1508     Alarm_Type: processing-error
1509     Probable_Cause: procedural-error
1510     Service_Affecting: True
1511     Suppression: True
1512     Management_Affecting_Severity: warning
1513     Degrade_Affecting_Severity: none
1514
1515 700.004:
1516     Type: Alarm
1517     Description: Instance <instance_name> owned by <tenant_name> is stopped on host <host_name>
1518     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1519     Severity: critical
1520     Proposed_Repair_Action: Start the instance
1521     Maintenance_Action:
1522     Inhibit_Alarms:
1523     Alarm_Type: processing-error
1524     Probable_Cause: procedural-error
1525     Service_Affecting: True
1526     Suppression: True
1527     Management_Affecting_Severity: warning
1528     Degrade_Affecting_Severity: none
1529
1530 700.005:
1531     Type: Alarm
1532     Description: Instance <instance_name> owned by <tenant_name> is rebooting on host <host_name>
1533     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1534     Severity: critical
1535     Proposed_Repair_Action: Wait for reboot to complete; if problem persists contact next level of support
1536     Maintenance_Action:
1537     Inhibit_Alarms:
1538     Alarm_Type: processing-error
1539     Probable_Cause: unspecified-reason
1540     Service_Affecting: True
1541     Suppression: True
1542     Management_Affecting_Severity: warning
1543     Degrade_Affecting_Severity: none
1544
1545 700.006:
1546     Type: Alarm
1547     Description: Instance <instance_name> owned by <tenant_name> is rebuilding on host <host_name>
1548     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1549     Severity: critical
1550     Proposed_Repair_Action: Wait for rebuild to complete; if problem persists contact next level of support
1551     Maintenance_Action:
1552     Inhibit_Alarms:
1553     Alarm_Type: processing-error
1554     Probable_Cause: underlying-resource-unavailable
1555     Service_Affecting: True
1556     Suppression: True
1557     Management_Affecting_Severity: warning
1558     Degrade_Affecting_Severity: none
1559
1560 700.007:
1561     Type: Alarm
1562     Description: Instance <instance_name> owned by <tenant_name> is evacuating from host <host_name>
1563     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1564     Severity: critical
1565     Proposed_Repair_Action: Wait for evacuate to complete; if problem persists contact next level of support
1566     Maintenance_Action:
1567     Inhibit_Alarms:
1568     Alarm_Type: processing-error
1569     Probable_Cause: underlying-resource-unavailable
1570     Service_Affecting: True
1571     Suppression: True
1572     Management_Affecting_Severity: warning
1573     Degrade_Affecting_Severity: none
1574
1575 700.008:
1576     Type: Alarm
1577     Description: Instance <instance_name> owned by <tenant_name> is live migrating from host <host_name>
1578     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1579     Severity: warning
1580     Proposed_Repair_Action: Wait for live migration to complete; if problem persists contact next level of support
1581     Maintenance_Action:
1582     Inhibit_Alarms:
1583     Alarm_Type: processing-error
1584     Probable_Cause: unspecified-reason
1585     Service_Affecting: True
1586     Suppression: True
1587     Management_Affecting_Severity: warning
1588     Degrade_Affecting_Severity: none
1589
1590 700.009:
1591     Type: Alarm
1592     Description: Instance <instance_name> owned by <tenant_name> is cold migrating from host <host_name>
1593     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1594     Severity: critical
1595     Proposed_Repair_Action: Wait for cold migration to complete; if problem persists contact next level of support
1596     Maintenance_Action:
1597     Inhibit_Alarms:
1598     Alarm_Type: processing-error
1599     Probable_Cause: unspecified-reason
1600     Service_Affecting: True
1601     Suppression: True
1602     Management_Affecting_Severity: warning
1603     Degrade_Affecting_Severity: none
1604
1605 700.010:
1606     Type: Alarm
1607     Description: Instance <instance_name> owned by <tenant_name> has been cold-migrated to host <host_name> waiting for confirmation
1608     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1609     Severity: critical
1610     Proposed_Repair_Action: Confirm or revert cold-migrate of instance
1611     Maintenance_Action:
1612     Inhibit_Alarms:
1613     Alarm_Type: processing-error
1614     Probable_Cause: unspecified-reason
1615     Service_Affecting: True
1616     Suppression: True
1617     Management_Affecting_Severity: warning
1618     Degrade_Affecting_Severity: none
1619
1620 700.011:
1621     Type: Alarm
1622     Description: Instance <instance_name> owned by <tenant_name> is reverting cold migrate to host <host_name>
1623     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1624     Severity: critical
1625     Proposed_Repair_Action: "Wait for cold migration revert to complete; if problem persists contact next level of support"
1626     Maintenance_Action:
1627     Inhibit_Alarms:
1628     Alarm_Type: other
1629     Probable_Cause: unspecified-reason
1630     Service_Affecting: True
1631     Suppression: True
1632     Management_Affecting_Severity: warning
1633     Degrade_Affecting_Severity: none
1634
1635 700.012:
1636     Type: Alarm
1637     Description: Instance <instance_name> owned by <tenant_name> is resizing on host <host_name>
1638     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1639     Severity: critical
1640     Proposed_Repair_Action: Wait for resize to complete; if problem persists contact next level of support
1641     Maintenance_Action:
1642     Inhibit_Alarms:
1643     Alarm_Type: processing-error
1644     Probable_Cause: unspecified-reason
1645     Service_Affecting: True
1646     Suppression: True
1647     Management_Affecting_Severity: warning
1648     Degrade_Affecting_Severity: none
1649
1650 700.013:
1651     Type: Alarm
1652     Description: Instance <instance_name> owned by <tenant_name> has been resized on host <host_name> waiting for confirmation
1653     Entity_Instance_ID: itenant=<tenant-uuid>.instance=<instance-uuid>
1654     Severity: critical
1655     Proposed_Repair_Action: Confirm or revert resize of instance
1656     Maintenance_Action:
1657     Inhibit_Alarms:
1658     Alarm_Type: processing-error
1659     Probable_Cause: unspecified-reason
1660     Service_Affecting: True
1661     Suppression: True
1662     Management_Affecting_Severity: warning
1663     Degrade_Affecting_Severity: none
1664
1665 700.014:
1666     Type: Alarm
1667     Description: Instance <instance_name> owned by <tenant_name> is reverting resize on host <host_name>
1668     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1669     Severity: critical
1670     Proposed_Repair_Action: "Wait for resize revert to complete; if problem  persists contact next level of support"
1671     Maintenance_Action:
1672     Inhibit_Alarms:
1673     Alarm_Type: other
1674     Probable_Cause: unspecified-reason
1675     Service_Affecting: True
1676     Suppression: True
1677     Management_Affecting_Severity: warning
1678     Degrade_Affecting_Severity: none
1679
1680 700.015:
1681     Type: Alarm
1682     Description: Guest Heartbeat not established for instance <instance_name> owned by <tenant_name> on host <host_name>
1683     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1684     Severity: major
1685     Proposed_Repair_Action: "Verify that the instance is running the Guest-Client daemon, or disable Guest Heartbeat for the instance if no longer needed, otherwise contact next level of support"
1686     Maintenance_Action:
1687     Inhibit_Alarms:
1688     Alarm_Type: communication
1689     Probable_Cause: procedural-error
1690     Service_Affecting: True
1691     Suppression: True
1692     Management_Affecting_Severity: warning
1693     Degrade_Affecting_Severity: none
1694
1695 700.016:
1696     Type: Alarm
1697     Description: Multi-Node Recovery Mode
1698     Entity_Instance_ID: subsystem=vim
1699     Severity: minor
1700     Proposed_Repair_Action: "Wait for the system to exit out of this mode"
1701     Maintenance_Action:
1702     Inhibit_Alarms:
1703     Alarm_Type: equipment
1704     Probable_Cause: unspecified-reason
1705     Service_Affecting: True
1706     Suppression: True
1707     Management_Affecting_Severity: warning
1708     Degrade_Affecting_Severity: none
1709
1710 700.017:
1711     Type: Alarm
1712     Description: Server group <server_group_name> <policy> policy was not satisfied
1713     Entity_Instance_ID: server-group<server-group-uuid>
1714     Severity: minor
1715     Proposed_Repair_Action: "Migrate instances in an attempt to satisfy the policy; if problem persists contact next level of support"
1716     Maintenance_Action:
1717     Inhibit_Alarms:
1718     Alarm_Type: processing-error
1719     Probable_Cause: procedural-error
1720     Service_Affecting: True
1721     Suppression: True
1722     Management_Affecting_Severity: none
1723     Degrade_Affecting_Severity: none
1724
1725
1726 700.101:
1727     Type: Log
1728     Description: Instance <instance_name> is enabled on host <host_name>
1729     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1730     Severity: critical
1731     Alarm_Type: equipment
1732     Probable_Cause: unspecified-reason
1733     Service_Affecting: False
1734
1735 700.102:
1736     Type: Log
1737     Description: Instance <instance_name> owned by <tenant_name> has failed[, reason = <reason_text>]
1738         Instance <instance_name> owned by <tenant_name> has failed to schedule[, reason = <reason_text>]
1739     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1740     Severity: critical
1741     Alarm_Type: equipment
1742     Probable_Cause: unspecified-reason
1743     Service_Affecting: False
1744
1745 700.103:
1746     Type: Log
1747     Description: Create issued <by <tenant_name>|by the system> against <instance_name> owned by <tenant_name>
1748     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1749     Severity: critical
1750     Alarm_Type: equipment
1751     Probable_Cause: unspecified-reason
1752     Service_Affecting: False
1753
1754 700.104:
1755     Type: Log
1756     Description: Creating instance <instance_name> owned by <tenant_name>
1757     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1758     Severity: critical
1759     Alarm_Type: equipment
1760     Probable_Cause: unspecified-reason
1761     Service_Affecting: False
1762
1763 700.105:
1764     Type: Log
1765     Description: "Create rejected for instance <instance_name>[, reason = <reason_text>]"
1766     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1767     Severity: critical
1768     Alarm_Type: equipment
1769     Probable_Cause: unspecified-reason
1770     Service_Affecting: False
1771
1772 700.106:
1773     Type: Log
1774     Description: "Create cancelled for instance <instance_name>[, reason = <reason_text>]"
1775     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1776     Severity: critical
1777     Alarm_Type: equipment
1778     Probable_Cause: unspecified-reason
1779     Service_Affecting: False
1780
1781 700.107:
1782     Type: Log
1783     Description: "Create failed for instance <instance_name>[, reason = <reason_text>]"
1784     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1785     Severity: critical
1786     Alarm_Type: equipment
1787     Probable_Cause: unspecified-reason
1788     Service_Affecting: False
1789
1790 700.108:
1791     Type: Log
1792     Description: Inance <instance_name> owned by <tenant_name> has been created
1793     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1794     Severity: critical
1795     Alarm_Type: equipment
1796     Probable_Cause: unspecified-reason
1797     Service_Affecting: False
1798
1799 700.109:
1800     Type: Log
1801     Description: "Delete issued <by tenant <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
1802     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1803     Severity: critical
1804     Alarm_Type: equipment
1805     Probable_Cause: unspecified-reason
1806     Service_Affecting: False
1807
1808 700.110:
1809     Type: Log
1810     Description: Deleting instance <instance_name> owned by <tenatn_name>
1811     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1812     Severity: critical
1813     Alarm_Type: equipment
1814     Probable_Cause: unspecified-reason
1815     Service_Affecting: False
1816
1817 700.111:
1818     Type: Log
1819     Description: "Delete rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1820     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1821     Severity: critical
1822     Alarm_Type: equipment
1823     Probable_Cause: unspecified-reason
1824     Service_Affecting: False
1825
1826 700.112:
1827     Type: Log
1828     Description: "Delete cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1829     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1830     Severity: critical
1831     Alarm_Type: equipment
1832     Probable_Cause: unspecified-reason
1833     Service_Affecting: False
1834
1835 700.113:
1836     Type: Log
1837     Description: "Delete failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1838     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1839     Severity: critical
1840     Alarm_Type: equipment
1841     Probable_Cause: unspecified-reason
1842     Service_Affecting: False
1843
1844 700.114:
1845     Type: Log
1846     Description: Deleted instance <instance_name> owned by <tenant_name>
1847     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1848     Severity: critical
1849     Alarm_Type: equipment
1850     Probable_Cause: unspecified-reason
1851     Service_Affecting: False
1852
1853 700.115:
1854     Type: Log
1855     Description: "Pause issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
1856     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1857     Severity: critical
1858     Alarm_Type: equipment
1859     Probable_Cause: unspecified-reason
1860     Service_Affecting: False
1861
1862 700.116:
1863     Type: Log
1864     Description: Pause inprogress for instance <instance_name> on host <host_name>
1865     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1866     Severity: critical
1867     Alarm_Type: equipment
1868     Probable_Cause: unspecified-reason
1869     Service_Affecting: False
1870
1871 700.117:
1872     Type: Log
1873     Description: "Pause rejected for instance <instance_name> enabled on host <host_name>[, reason = <reason_text>]"
1874     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1875     Severity: critical
1876     Alarm_Type: equipment
1877     Probable_Cause: unspecified-reason
1878     Service_Affecting: False
1879
1880 700.118:
1881     Type: Log
1882     Description: "Pause cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1883     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1884     Severity: critical
1885     Alarm_Type: equipment
1886     Probable_Cause: unspecified-reason
1887     Service_Affecting: False
1888
1889 700.119:
1890     Type: Log
1891     Description: "Pause failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1892     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1893     Severity: critical
1894     Alarm_Type: equipment
1895     Probable_Cause: unspecified-reason
1896     Service_Affecting: False
1897
1898 700.120:
1899     Type: Log
1900     Description: Pause complete for instance <instance_name> now paused on host <host_name>
1901     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1902     Severity: critical
1903     Alarm_Type: equipment
1904     Probable_Cause: unspecified-reason
1905     Service_Affecting: False
1906
1907 700.121:
1908     Type: Log
1909     Description: "Unpause issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
1910     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1911     Severity: critical
1912     Alarm_Type: equipment
1913     Probable_Cause: unspecified-reason
1914     Service_Affecting: False
1915
1916 700.122:
1917     Type: Log
1918     Description: Unpause inprogress for instance <instance_name> on host <host_name>
1919     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1920     Severity: critical
1921     Alarm_Type: equipment
1922     Probable_Cause: unspecified-reason
1923     Service_Affecting: False
1924
1925 700.123:
1926     Type: Log
1927     Description: "Unpause rejected for instance <instance_name> paused on host <host_name>[, reason = <reason_text>]"
1928     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1929     Severity: critical
1930     Alarm_Type: equipment
1931     Probable_Cause: unspecified-reason
1932     Service_Affecting: False
1933
1934 700.124:
1935     Type: Log
1936     Description: "Unpause cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1937     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1938     Severity: critical
1939     Alarm_Type: equipment
1940     Probable_Cause: unspecified-reason
1941     Service_Affecting: False
1942
1943 700.125:
1944     Type: Log
1945     Description: "Unpause failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1946     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1947     Severity: critical
1948     Alarm_Type: equipment
1949     Probable_Cause: unspecified-reason
1950     Service_Affecting: False
1951
1952 700.126:
1953     Type: Log
1954     Description: Unpause complete for instance <instance_name> now enabled on host <host_name>
1955     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1956     Severity: critical
1957     Alarm_Type: equipment
1958     Probable_Cause: unspecified-reason
1959     Service_Affecting: False
1960
1961 700.127:
1962     Type: Log
1963     Description: "Suspend issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
1964     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1965     Severity: critical
1966     Alarm_Type: equipment
1967     Probable_Cause: unspecified-reason
1968     Service_Affecting: False
1969
1970 700.128:
1971     Type: Log
1972     Description: Suspend inprogress for instance <instance_name> on host <host_name>
1973     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1974     Severity: critical
1975     Alarm_Type: equipment
1976     Probable_Cause: unspecified-reason
1977     Service_Affecting: False
1978
1979 700.129:
1980     Type: Log
1981     Description: "Suspend rejected for instance <instance_name> enabled on host <host_name>[, reason = <reason_text>]"
1982     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1983     Severity: critical
1984     Alarm_Type: equipment
1985     Probable_Cause: unspecified-reason
1986     Service_Affecting: False
1987
1988 700.130:
1989     Type: Log
1990     Description: "Suspend cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1991     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1992     Severity: critical
1993     Alarm_Type: equipment
1994     Probable_Cause: unspecified-reason
1995     Service_Affecting: False
1996
1997 700.131:
1998     Type: Log
1999     Description: "Suspend failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2000     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2001     Severity: critical
2002     Alarm_Type: equipment
2003     Probable_Cause: unspecified-reason
2004     Service_Affecting: False
2005
2006 700.132:
2007     Type: Log
2008     Description: Suspend complete for instance <instance_name> now suspended on host <host_name>
2009     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2010     Severity: critical
2011     Alarm_Type: equipment
2012     Probable_Cause: unspecified-reason
2013     Service_Affecting: False
2014
2015 700.133:
2016     Type: Log
2017     Description: "Resume issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2018     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2019     Severity: critical
2020     Alarm_Type: equipment
2021     Probable_Cause: unspecified-reason
2022     Service_Affecting: False
2023
2024 700.134:
2025     Type: Log
2026     Description: Resume inprogress for instance <instance_name> on host <host_name>
2027     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2028     Severity: critical
2029     Alarm_Type: equipment
2030     Probable_Cause: unspecified-reason
2031     Service_Affecting: False
2032
2033 700.135:
2034     Type: Log
2035     Description: "Resume rejected for instance <instance_name> suspended on host <host_name>[, reason = <reason_text>]"
2036     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2037     Severity: critical
2038     Alarm_Type: equipment
2039     Probable_Cause: unspecified-reason
2040     Service_Affecting: False
2041
2042 700.136:
2043     Type: Log
2044     Description: "Resume cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2045     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2046     Severity: critical
2047     Alarm_Type: equipment
2048     Probable_Cause: unspecified-reason
2049     Service_Affecting: False
2050
2051 700.137:
2052     Type: Log
2053     Description: "Resume failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2054     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2055     Severity: critical
2056     Alarm_Type: equipment
2057     Probable_Cause: unspecified-reason
2058     Service_Affecting: False
2059
2060 700.138:
2061     Type: Log
2062     Description: Resume complete for instance <instance_name> now enabled on host <host_name>
2063     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2064     Severity: critical
2065     Alarm_Type: equipment
2066     Probable_Cause: unspecified-reason
2067     Service_Affecting: False
2068
2069 700.139:
2070     Type: Log
2071     Description: "Start issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2072     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2073     Severity: critical
2074     Alarm_Type: equipment
2075     Probable_Cause: unspecified-reason
2076     Service_Affecting: False
2077
2078 700.140:
2079     Type: Log
2080     Description: Start inprogress for instance <instance_name> on host <host_name>
2081     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2082     Severity: critical
2083     Alarm_Type: equipment
2084     Probable_Cause: unspecified-reason
2085     Service_Affecting: False
2086
2087 700.141:
2088     Type: Log
2089     Description: "Start rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2090     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2091     Severity: critical
2092     Alarm_Type: equipment
2093     Probable_Cause: unspecified-reason
2094     Service_Affecting: False
2095
2096 700.142:
2097     Type: Log
2098     Description: "Start cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2099     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2100     Severity: critical
2101     Alarm_Type: equipment
2102     Probable_Cause: unspecified-reason
2103     Service_Affecting: False
2104
2105 700.143:
2106     Type: Log
2107     Description: "Start failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2108     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2109     Severity: critical
2110     Alarm_Type: equipment
2111     Probable_Cause: unspecified-reason
2112     Service_Affecting: False
2113
2114 700.144:
2115     Type: Log
2116     Description: Start complete for instance <instance_name> now enabled on host <host_name>
2117     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2118     Severity: critical
2119     Alarm_Type: equipment
2120     Probable_Cause: unspecified-reason
2121     Service_Affecting: False
2122
2123 700.145:
2124     Type: Log
2125     Description: "Stop issued <by <tenant_name>|by the system|by the instance>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2126     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2127     Severity: critical
2128     Alarm_Type: equipment
2129     Probable_Cause: unspecified-reason
2130     Service_Affecting: False
2131
2132 700.146:
2133     Type: Log
2134     Description: Stop inprogress for instance <instance_name> on host <host_name>
2135     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2136     Severity: critical
2137     Alarm_Type: equipment
2138     Probable_Cause: unspecified-reason
2139     Service_Affecting: False
2140
2141 700.147:
2142     Type: Log
2143     Description: "Stop rejected for instance <instance_name> enabled on host <host_name>[, reason = <reason_text>]"
2144     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2145     Severity: critical
2146     Alarm_Type: equipment
2147     Probable_Cause: unspecified-reason
2148     Service_Affecting: False
2149
2150 700.148:
2151     Type: Log
2152     Description: "Stop cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2153     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2154     Severity: critical
2155     Alarm_Type: equipment
2156     Probable_Cause: unspecified-reason
2157     Service_Affecting: False
2158
2159 700.149:
2160     Type: Log
2161     Description: "Stop failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2162     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2163     Severity: critical
2164     Alarm_Type: equipment
2165     Probable_Cause: unspecified-reason
2166     Service_Affecting: False
2167
2168 700.150:
2169     Type: Log
2170     Description: Stop complete for instance <instance_name> now disabled on host <host_name>
2171     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2172     Severity: critical
2173     Alarm_Type: equipment
2174     Probable_Cause: unspecified-reason
2175     Service_Affecting: False
2176
2177 700.151:
2178     Type: Log
2179     Description: "Live-Migrate issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> from host <host_name>[, reason = <reason_text>]"
2180     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2181     Severity: critical
2182     Alarm_Type: equipment
2183     Probable_Cause: unspecified-reason
2184     Service_Affecting: False
2185
2186 700.152:
2187     Type: Log
2188     Description: Live-Migrate inprogress for instance <instance_name> from host <host_name>
2189     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2190     Severity: critical
2191     Alarm_Type: equipment
2192     Probable_Cause: unspecified-reason
2193     Service_Affecting: False
2194
2195 700.153:
2196     Type: Log
2197     Description: "Live-Migrate rejected for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2198     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2199     Severity: critical
2200     Alarm_Type: equipment
2201     Probable_Cause: unspecified-reason
2202     Service_Affecting: False
2203
2204 700.154:
2205     Type: Log
2206     Description: "Live-Migrate cancelled for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2207     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2208     Severity: critical
2209     Alarm_Type: equipment
2210     Probable_Cause: unspecified-reason
2211     Service_Affecting: False
2212
2213 700.155:
2214     Type: Log
2215     Description: "Live-Migrate failed for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2216     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2217     Severity: critical
2218     Alarm_Type: equipment
2219     Probable_Cause: unspecified-reason
2220     Service_Affecting: False
2221
2222 700.156:
2223     Type: Log
2224     Description: Live-Migrate complete for instance <instance_name> now enabled on host <host_name>
2225     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2226     Severity: critical
2227     Alarm_Type: equipment
2228     Probable_Cause: unspecified-reason
2229     Service_Affecting: False
2230
2231 700.157:
2232     Type: Log
2233     Description: "Cold-Migrate issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> from host <host_name>[, reason = <reason_text>]"
2234     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2235     Severity: critical
2236     Alarm_Type: equipment
2237     Probable_Cause: unspecified-reason
2238     Service_Affecting: False
2239
2240 700.158:
2241     Type: Log
2242     Description: Cold-Migrate inprogress for instance <instance_name> from host <host_name>
2243     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2244     Severity: critical
2245     Alarm_Type: equipment
2246     Probable_Cause: unspecified-reason
2247     Service_Affecting: False
2248
2249 700.159:
2250     Type: Log
2251     Description: "Cold-Migrate rejected for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2252     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2253     Severity: critical
2254     Alarm_Type: equipment
2255     Probable_Cause: unspecified-reason
2256     Service_Affecting: False
2257
2258 700.160:
2259     Type: Log
2260     Description: "Cold-Migrate cancelled for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2261     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2262     Severity: critical
2263     Alarm_Type: equipment
2264     Probable_Cause: unspecified-reason
2265     Service_Affecting: False
2266
2267 700.161:
2268     Type: Log
2269     Description: "Cold-Migrate failed for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2270     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2271     Severity: critical
2272     Alarm_Type: equipment
2273     Probable_Cause: unspecified-reason
2274     Service_Affecting: False
2275
2276 700.162:
2277     Type: Log
2278     Description: Cold-Migrate complete for instance <instance_name> now enabled on host <host_name>
2279     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2280     Severity: critical
2281     Alarm_Type: equipment
2282     Probable_Cause: unspecified-reason
2283     Service_Affecting: False
2284
2285 700.163:
2286     Type: Log
2287     Description: "Cold-Migrate-Confirm issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2288     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2289     Severity: critical
2290     Alarm_Type: equipment
2291     Probable_Cause: unspecified-reason
2292     Service_Affecting: False
2293
2294 700.164:
2295     Type: Log
2296     Description: Cold-Migrate-Confirm inprogress for instance <instance_name> on host <host_name>
2297     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2298     Severity: critical
2299     Alarm_Type: equipment
2300     Probable_Cause: unspecified-reason
2301     Service_Affecting: False
2302
2303 700.165:
2304     Type: Log
2305     Description: "Cold-Migrate-Confirm rejected for instance <instance_name> now enabled on host <host_name>[, reason = <reason_text>]"
2306     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2307     Severity: critical
2308     Alarm_Type: equipment
2309     Probable_Cause: unspecified-reason
2310     Service_Affecting: False
2311
2312 700.166:
2313     Type: Log
2314     Description: "Cold-Migrate-Confirm cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2315     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2316     Severity: critical
2317     Alarm_Type: equipment
2318     Probable_Cause: unspecified-reason
2319     Service_Affecting: False
2320
2321 700.167:
2322     Type: Log
2323     Description: "Cold-Migrate-Confirm failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2324     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2325     Severity: critical
2326     Alarm_Type: equipment
2327     Probable_Cause: unspecified-reason
2328     Service_Affecting: False
2329
2330 700.168:
2331     Type: Log
2332     Description: Cold-Migrate-Confirm complete for instance <instance_name> enabled on host <host_name>
2333     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2334     Severity: critical
2335     Alarm_Type: equipment
2336     Probable_Cause: unspecified-reason
2337     Service_Affecting: False
2338
2339 700.169:
2340     Type: Log
2341     Description: "Cold-Migrate-Revert issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2342     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2343     Severity: critical
2344     Alarm_Type: equipment
2345     Probable_Cause: unspecified-reason
2346     Service_Affecting: False
2347
2348 700.170:
2349     Type: Log
2350     Description: Cold-Migrate-Revert inprogress for instance <instance_name> from host <host_name>
2351     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2352     Severity: critical
2353     Alarm_Type: equipment
2354     Probable_Cause: unspecified-reason
2355     Service_Affecting: False
2356
2357 700.171:
2358     Type: Log
2359     Description: "Cold-Migrate-Revert rejected for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2360     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2361     Severity: critical
2362     Alarm_Type: equipment
2363     Probable_Cause: unspecified-reason
2364     Service_Affecting: False
2365
2366 700.172:
2367     Type: Log
2368     Description: "Cold-Migrate-Revert cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2369     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2370     Severity: critical
2371     Alarm_Type: equipment
2372     Probable_Cause: unspecified-reason
2373     Service_Affecting: False
2374
2375 700.173:
2376     Type: Log
2377     Description: "Cold-Migrate-Revert failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2378     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2379     Severity: critical
2380     Alarm_Type: equipment
2381     Probable_Cause: unspecified-reason
2382     Service_Affecting: False
2383
2384 700.174:
2385     Type: Log
2386     Description: Cold-Migrate-Revert complete for instance <instance_name> now enabled on host <host_name>
2387     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2388     Severity: critical
2389     Alarm_Type: equipment
2390     Probable_Cause: unspecified-reason
2391     Service_Affecting: False
2392
2393 700.175:
2394     Type: Log
2395     Description: "Evacuate issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2396     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2397     Severity: critical
2398     Alarm_Type: equipment
2399     Probable_Cause: unspecified-reason
2400     Service_Affecting: False
2401
2402 700.176:
2403     Type: Log
2404     Description: Evacuating instance <instance_name> owned by <tenant_name> from host <host_name>
2405     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2406     Severity: critical
2407     Alarm_Type: equipment
2408     Probable_Cause: unspecified-reason
2409     Service_Affecting: False
2410
2411 700.177:
2412     Type: Log
2413     Description: "Evacuate rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2414     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2415     Severity: critical
2416     Alarm_Type: equipment
2417     Probable_Cause: unspecified-reason
2418     Service_Affecting: False
2419
2420 700.178:
2421     Type: Log
2422     Description: "Evacuate cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2423     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2424     Severity: critical
2425     Alarm_Type: equipment
2426     Probable_Cause: unspecified-reason
2427     Service_Affecting: False
2428
2429 700.179:
2430     Type: Log
2431     Description: "Evacuate failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2432     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2433     Severity: critical
2434     Alarm_Type: equipment
2435     Probable_Cause: unspecified-reason
2436     Service_Affecting: False
2437
2438 700.180:
2439     Type: Log
2440     Description: Evacuate complete for instance <instance_name> now enabled on host <host_name>
2441     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2442     Severity: critical
2443     Alarm_Type: equipment
2444     Probable_Cause: unspecified-reason
2445     Service_Affecting: False
2446
2447 700.181:
2448     Type: Log
2449     Description: "Reboot <(soft-reboot)|(hard-reboot)> issued <by <tenant_name>|by the system|by the instance>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2450     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2451     Severity: critical
2452     Alarm_Type: equipment
2453     Probable_Cause: unspecified-reason
2454     Service_Affecting: False
2455
2456 700.182:
2457     Type: Log
2458     Description: Reboot inprogress for instance <instance_name> on host <host_name>
2459     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2460     Severity: critical
2461     Alarm_Type: equipment
2462     Probable_Cause: unspecified-reason
2463     Service_Affecting: False
2464
2465 700.183:
2466     Type: Log
2467     Description: "Reboot rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2468     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2469     Severity: critical
2470     Alarm_Type: equipment
2471     Probable_Cause: unspecified-reason
2472     Service_Affecting: False
2473
2474 700.184:
2475     Type: Log
2476     Description: "Reboot cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2477     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2478     Severity: critical
2479     Alarm_Type: equipment
2480     Probable_Cause: unspecified-reason
2481     Service_Affecting: False
2482
2483 700.185:
2484     Type: Log
2485     Description: "Reboot failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2486     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2487     Severity: critical
2488     Alarm_Type: equipment
2489     Probable_Cause: unspecified-reason
2490     Service_Affecting: False
2491
2492 700.186:
2493     Type: Log
2494     Description: Reboot complete for instance <instance_name> now enabled on host <host_name>
2495     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2496     Severity: critical
2497     Alarm_Type: equipment
2498     Probable_Cause: unspecified-reason
2499     Service_Affecting: False
2500
2501 700.187:
2502     Type: Log
2503     Description: "Rebuild issued <by <tenant_name>|by the system> against instance <instance_name> using image <image_name> on host <host_name>[, reason = <reason_text>]"
2504     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2505     Severity: critical
2506     Alarm_Type: equipment
2507     Probable_Cause: unspecified-reason
2508     Service_Affecting: False
2509
2510 700.188:
2511     Type: Log
2512     Description: Rebuild inprogress for instance <instance_name> on host <host_name>
2513     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2514     Severity: critical
2515     Alarm_Type: equipment
2516     Probable_Cause: unspecified-reason
2517     Service_Affecting: False
2518
2519 700.189:
2520     Type: Log
2521     Description: "Rebuild rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2522     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2523     Severity: critical
2524     Alarm_Type: equipment
2525     Probable_Cause: unspecified-reason
2526     Service_Affecting: False
2527
2528 700.190:
2529     Type: Log
2530     Description: "Rebuild cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2531     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2532     Severity: critical
2533     Alarm_Type: equipment
2534     Probable_Cause: unspecified-reason
2535     Service_Affecting: False
2536
2537 700.191:
2538     Type: Log
2539     Description: "Rebuild failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2540     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2541     Severity: critical
2542     Alarm_Type: equipment
2543     Probable_Cause: unspecified-reason
2544     Service_Affecting: False
2545
2546 700.192:
2547     Type: Log
2548     Description: Rebuild complete for instance <instance_name> now enabled on host <host_name>
2549     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2550     Severity: critical
2551     Alarm_Type: equipment
2552     Probable_Cause: unspecified-reason
2553     Service_Affecting: False
2554
2555 700.193:
2556     Type: Log
2557     Description: "Resize issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2558     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2559     Severity: critical
2560     Alarm_Type: equipment
2561     Probable_Cause: unspecified-reason
2562     Service_Affecting: False
2563
2564 700.194:
2565     Type: Log
2566     Description: Resize inprogress for instance <instance_name> on host <host_name>
2567     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2568     Severity: critical
2569     Alarm_Type: equipment
2570     Probable_Cause: unspecified-reason
2571     Service_Affecting: False
2572
2573 700.195:
2574     Type: Log
2575     Description: "Resize rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2576     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2577     Severity: critical
2578     Alarm_Type: equipment
2579     Probable_Cause: unspecified-reason
2580     Service_Affecting: False
2581
2582 700.196:
2583     Type: Log
2584     Description: "Resize cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2585     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2586     Severity: critical
2587     Alarm_Type: equipment
2588     Probable_Cause: unspecified-reason
2589     Service_Affecting: False
2590
2591 700.197:
2592     Type: Log
2593     Description: "Resize failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2594     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2595     Severity: critical
2596     Alarm_Type: equipment
2597     Probable_Cause: unspecified-reason
2598     Service_Affecting: False
2599
2600 700.198:
2601     Type: Log
2602     Description: Resize complete for instance <instance_name> enabled on host <host_name> waiting for confirmation
2603     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2604     Severity: critical
2605     Alarm_Type: equipment
2606     Probable_Cause: unspecified-reason
2607     Service_Affecting: False
2608
2609 700.199:
2610     Type: Log
2611     Description: "Resize-Confirm issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2612     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2613     Severity: critical
2614     Alarm_Type: equipment
2615     Probable_Cause: unspecified-reason
2616     Service_Affecting: False
2617
2618 700.200:
2619     Type: Log
2620     Description: Resize-Confirm inprogress for instance <instance_name> on host <host_name>
2621     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2622     Severity: critical
2623     Alarm_Type: equipment
2624     Probable_Cause: unspecified-reason
2625     Service_Affecting: False
2626
2627 700.201:
2628     Type: Log
2629     Description: "Resize-Confirm rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2630     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2631     Severity: critical
2632     Alarm_Type: equipment
2633     Probable_Cause: unspecified-reason
2634     Service_Affecting: False
2635
2636 700.202:
2637     Type: Log
2638     Description: "Resize-Confirm cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2639     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2640     Severity: critical
2641     Alarm_Type: equipment
2642     Probable_Cause: unspecified-reason
2643     Service_Affecting: False
2644
2645 700.203:
2646     Type: Log
2647     Description: "Resize-Confirm failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2648     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2649     Severity: critical
2650     Alarm_Type: equipment
2651     Probable_Cause: unspecified-reason
2652     Service_Affecting: False
2653
2654 700.204:
2655     Type: Log
2656     Description: Resize-Confirm complete for instance <instance_name> enabled on host <host_name>
2657     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2658     Severity: critical
2659     Alarm_Type: equipment
2660     Probable_Cause: unspecified-reason
2661     Service_Affecting: False
2662
2663 700.205:
2664     Type: Log
2665     Description: "Resize-Revert issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2666     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2667     Severity: critical
2668     Alarm_Type: equipment
2669     Probable_Cause: unspecified-reason
2670     Service_Affecting: False
2671
2672 700.206:
2673     Type: Log
2674     Description: Resize-Revert inprogress for instance <instance_name> on host <host_name>
2675     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2676     Severity: critical
2677     Alarm_Type: equipment
2678     Probable_Cause: unspecified-reason
2679     Service_Affecting: False
2680
2681 700.207:
2682     Type: Log
2683     Description: "Resize-Revert rejected for instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2684     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2685     Severity: critical
2686     Alarm_Type: equipment
2687     Probable_Cause: unspecified-reason
2688     Service_Affecting: False
2689
2690 700.208:
2691     Type: Log
2692     Description: "Resize-Revert cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2693     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2694     Severity: critical
2695     Alarm_Type: equipment
2696     Probable_Cause: unspecified-reason
2697     Service_Affecting: False
2698
2699 700.209:
2700     Type: Log
2701     Description: "Resize-Revert failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2702     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2703     Severity: critical
2704     Alarm_Type: equipment
2705     Probable_Cause: unspecified-reason
2706     Service_Affecting: False
2707
2708 700.210:
2709     Type: Log
2710     Description: Resize-Revert complete for instance <instance_name> enabled on host <host_name>
2711     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2712     Severity: critical
2713     Alarm_Type: equipment
2714     Probable_Cause: unspecified-reason
2715     Service_Affecting: False
2716
2717 700.211:
2718     Type: Log
2719     Description: Guest Heartbeat established for instance <instance_name> on host <host_name>
2720     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2721     Severity: major
2722     Alarm_Type: equipment
2723     Probable_Cause: unspecified-reason
2724     Service_Affecting: False
2725
2726 700.212:
2727     Type: Log
2728     Description: Guest Heartbeat disconnected for instance <instance_name> on host <host_name>
2729     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2730     Severity: major
2731     Alarm_Type: equipment
2732     Probable_Cause: unspecified-reason
2733     Service_Affecting: False
2734
2735 700.213:
2736     Type: Log
2737     Description: "Guest Heartbeat failed for instance <instance_name>[, reason = <reason_text>]"
2738     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2739     Severity: critical
2740     Alarm_Type: equipment
2741     Probable_Cause: unspecified-reason
2742     Service_Affecting: False
2743
2744 700.214:
2745     Type: Log
2746     Description: Instance <instance_name> has been renamed to <new_instance_name> owned by <tenant_name> on host <host_name>
2747     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2748     Severity: critical
2749     Alarm_Type: equipment
2750     Probable_Cause: unspecified-reason
2751     Service_Affecting: False
2752
2753 700.215:
2754     Type: Log
2755     Description: "Guest Health Check failed for instance <instance_name>[, reason = <reason_text>]"
2756     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2757     Severity: critical
2758     Alarm_Type: equipment
2759     Probable_Cause: unspecified-reason
2760     Service_Affecting: False
2761
2762 700.216:
2763     Type: Log
2764     Description: "Entered Multi-Node Recovery Mode"
2765     Entity_Instance_ID: subsystem=vim
2766     Severity: critical
2767     Alarm_Type: equipment
2768     Probable_Cause: unspecified-reason
2769     Service_Affecting: False
2770
2771
2772 700.217:
2773     Type: Log
2774     Description: "Exited Multi-Node Recovery Mode"
2775     Entity_Instance_ID: subsystem=vim
2776     Severity: critical
2777     Alarm_Type: equipment
2778     Probable_Cause: unspecified-reason
2779     Service_Affecting: False
2780
2781 #---------------------------------------------------------------------------
2782 #   APPLICATION
2783 #---------------------------------------------------------------------------
2784
2785 750.001:
2786     Type: Alarm
2787     Description: "Application Upload Failure"
2788     Entity_Instance_ID: k8s_application=<appname>
2789     Severity: warning
2790     Proposed_Repair_Action: "Check system inventory log for cause."
2791     Maintenance_Action:
2792     Inhibit_Alarms:
2793     Alarm_Type: processing-error
2794     Probable_Cause: unknown
2795     Service_Affecting: False
2796     Suppression: True
2797     Management_Affecting_Severity: none
2798     Degrade_Affecting_Severity: none
2799
2800 750.002:
2801     Type: Alarm
2802     Description: "Application Apply Failure"
2803     Entity_Instance_ID: k8s_application=<appname>
2804     Severity: major
2805     Proposed_Repair_Action: "Retry applying the application. Check application is managed by the system application framework.
2806                              If the issue persists, please check system inventory log for cause."
2807     Maintenance_Action:
2808     Inhibit_Alarms:
2809     Alarm_Type: processing-error
2810     Probable_Cause: unknown
2811     Service_Affecting: True
2812     Suppression: True
2813     Management_Affecting_Severity: none
2814     Degrade_Affecting_Severity: none
2815
2816 750.003:
2817     Type: Alarm
2818     Description: "Application Remove Failure"
2819     Entity_Instance_ID: k8s_application=<appname>
2820     Severity: major
2821     Proposed_Repair_Action: "Retry removing the application. If the issue persists, please check system inventory log for cause."
2822     Maintenance_Action:
2823     Inhibit_Alarms:
2824     Alarm_Type: processing-error
2825     Probable_Cause: unknown
2826     Service_Affecting: True
2827     Suppression: True
2828     Management_Affecting_Severity: none
2829     Degrade_Affecting_Severity: none
2830
2831 750.004:
2832     Type: Alarm
2833     Description: "Application Apply In Progress"
2834     Entity_Instance_ID: k8s_application=<appname>
2835     Severity: warning
2836     Proposed_Repair_Action: "No action required."
2837     Maintenance_Action:
2838     Inhibit_Alarms:
2839     Alarm_Type: other
2840     Probable_Cause: unknown
2841     Service_Affecting: True
2842     Suppression: True
2843     Management_Affecting_Severity: warning
2844     Degrade_Affecting_Severity: none
2845
2846 750.005:
2847     Type: Alarm
2848     Description: "Application Update In Progress"
2849     Entity_Instance_ID: k8s_application=<appname>
2850     Severity: warning
2851     Proposed_Repair_Action: "No action required."
2852     Maintenance_Action:
2853     Inhibit_Alarms:
2854     Alarm_Type: other
2855     Probable_Cause: unknown
2856     Service_Affecting: True
2857     Suppression: True
2858     Management_Affecting_Severity: warning
2859     Degrade_Affecting_Severity: none
2860
2861 750.006:
2862     Type: Alarm
2863     Description: "Automatic Application Re-Apply Is Pending"
2864     Entity_Instance_ID: k8s_application=<appname>
2865     Severity: warning
2866     Proposed_Repair_Action: "Ensure all hosts are either locked or unlocked.  When the system is stable the application will be automatically reapplied."
2867     Maintenance_Action:
2868     Inhibit_Alarms:
2869     Alarm_Type: other
2870     Probable_Cause: unknown
2871     Service_Affecting: False
2872     Suppression: True
2873     Management_Affecting_Severity: none
2874     Degrade_Affecting_Severity: none
2875
2876 #---------------------------------------------------------------------------
2877 #   STORAGE
2878 #---------------------------------------------------------------------------
2879
2880 800.001:
2881     Type: Alarm
2882     Description: |-
2883         Storage Alarm Condition:
2884         1 mons down, quorum 1,2 controller-1,storage-0
2885     Entity_Instance_ID: cluster=<dist-fs-uuid>
2886     Severity: [critical, major]
2887     Proposed_Repair_Action: "If problem persists, contact next level of support."
2888     Maintenance_Action:
2889     Inhibit_Alarms:
2890     Alarm_Type: equipment
2891     Probable_Cause: equipment-malfunction
2892     Service_Affecting:
2893         critical: True
2894         major: False
2895     Suppression: False
2896     Management_Affecting_Severity: warning
2897     Degrade_Affecting_Severity: none
2898
2899 800.010:
2900     Type: Alarm
2901     Description: |-
2902         Potential data loss. No available OSDs in storage replication group.
2903     Entity_Instance_ID: cluster=<dist-fs-uuid>.peergroup=<group-x>
2904     Severity: [critical]
2905     Proposed_Repair_Action: "Ensure storage hosts from replication group are unlocked and available.
2906                              Check if OSDs of each storage host are up and running.
2907                              If problem persists contact next level of support."
2908     Maintenance_Action:
2909     Inhibit_Alarms:
2910     Alarm_Type: equipment
2911     Probable_Cause: equipment-malfunction
2912     Service_Affecting:
2913         critical: True
2914     Suppression: False
2915     Management_Affecting_Severity: warning
2916     Degrade_Affecting_Severity: none
2917
2918 800.011:
2919     Type: Alarm
2920     Description: |-
2921         Loss of replication in peergroup.
2922     Entity_Instance_ID: cluster=<dist-fs-uuid>.peergroup=<group-x>
2923     Severity: [major]
2924     Proposed_Repair_Action: "Ensure storage hosts from replication group are unlocked and available.
2925                              Check if OSDs of each storage host are up and running.
2926                              If problem persists contact next level of support."
2927     Maintenance_Action:
2928     Inhibit_Alarms:
2929     Alarm_Type: equipment
2930     Probable_Cause: equipment-malfunction
2931     Service_Affecting:
2932         major: True
2933     Suppression: False
2934     Management_Affecting_Severity: warning
2935     Degrade_Affecting_Severity: none
2936
2937 800.002:
2938     Type: Alarm
2939     Description: ["Image storage media is full: There is not enough disk space on the image storage media.",
2940                   "Instance <instance name> snapshot failed: There is not enough disk space on the image storage media.",
2941                   "Supplied <attrs> (<supplied>) and <attrs> generated from uploaded image (<actual>) did not match. Setting image status to 'killed'.",
2942                   "Error in store configuration. Adding images to store is disabled.",
2943                   "Forbidden upload attempt: <exception>",
2944                   "Insufficient permissions on image storage media: <exception>",
2945                   "Denying attempt to upload image larger than <size> bytes.",
2946                   "Denying attempt to upload image because it exceeds the quota: <exception>",
2947                   "Received HTTP error while uploading image <image_id>",
2948                   "Client disconnected before sending all data to backend",
2949                   "Failed to upload image <image_id>"]
2950     Entity_Instance_ID: ["image=<image-uuid>, instance=<instance-uuid>",
2951                          "tenant=<tenant-uuid>, instance=<instance-uuid>",
2952                          "image=<image-uuid>, instance=<instance-uuid>",
2953                          "image=<image-uuid>, instance=<instance-uuid>",
2954                          "image=<image-uuid>, instance=<instance-uuid>",
2955                          "image=<image-uuid>, instance=<instance-uuid>",
2956                          "image=<image-uuid>, instance=<instance-uuid>",
2957                          "image=<image-uuid>, instance=<instance-uuid>",
2958                          "image=<image-uuid>, instance=<instance-uuid>",
2959                          "image=<image-uuid>, instance=<instance-uuid>",
2960                          "image=<image-uuid>, instance=<instance-uuid>"]
2961     Alarm_Type: [physical-violation,
2962                  physical-violation,
2963                  integrity-violation,
2964                  integrity-violation,
2965                  security-service-or-mechanism-violation,
2966                  security-service-or-mechanism-violation,
2967                  security-service-or-mechanism-violation,
2968                  security-service-or-mechanism-violation,
2969                  communication,
2970                  communication,
2971                  operational-violation]
2972     Severity: warning
2973     Proposed_Repair_Action:
2974     Maintenance_Action:
2975     Inhibit_Alarms:
2976     Probable_Cause: unspecified-reason
2977     Service_Affecting: False
2978     Suppression: False
2979     Management_Affecting_Severity: none
2980     Degrade_Affecting_Severity: none
2981
2982 800.100:
2983     Type: Alarm
2984     Description: |-
2985         Storage Alarm Condition:
2986         Cinder I/O Congestion is above normal range and is building
2987     Entity_Instance_ID: cinder_io_monitor
2988     Severity: major
2989     Proposed_Repair_Action: "Reduce the I/O load on the Cinder LVM backend. Use
2990                              Cinder QoS mechanisms on high usage volumes."
2991     Maintenance_Action:
2992     Inhibit_Alarms:
2993     Alarm_Type: qos
2994     Probable_Cause: congestion
2995     Service_Affecting: False
2996     Suppression: False
2997     Management_Affecting_Severity: none
2998     Degrade_Affecting_Severity: none
2999
3000 800.101:
3001     Type: Alarm
3002     Description: |-
3003         Storage Alarm Condition:
3004         Cinder I/O Congestion is high and impacting guest performance
3005     Entity_Instance_ID: cinder_io_monitor
3006     Severity: critical
3007     Proposed_Repair_Action: "Reduce the I/O load on the Cinder LVM backend.
3008                              Cinder actions may fail until congestion is reduced.
3009                              Use Cinder QoS mechanisms on high usage volumes."
3010     Maintenance_Action:
3011     Inhibit_Alarms:
3012     Alarm_Type: qos
3013     Probable_Cause: congestion
3014     Service_Affecting: False
3015     Suppression: False
3016     Management_Affecting_Severity: warning
3017     Degrade_Affecting_Severity: none
3018
3019 800.103:
3020     Type: Alarm
3021     Description: |-
3022         Storage Alarm Condition:
3023         [ Metadata usage for LVM thin pool <VG name>/<Pool name> exceeded threshold and automatic extension failed,
3024           Metadata usage for LVM thin pool <VG name>/<Pool name> exceeded threshold ]; threshold x%, actual y%.
3025     Entity_Instance_ID: <hostname>.lvmthinpool=<VG name>/<Pool name>
3026     Severity: critical
3027     Proposed_Repair_Action: "Increase Storage Space Allotment for Cinder on the 'lvm' backend.
3028                              Consult the System Administration Manual for more details.
3029                              If problem persists, contact next level of support."
3030     Maintenance_Action:
3031     Inhibit_Alarms:
3032     Alarm_Type: operational-violation
3033     Probable_Cause: threshold-crossed
3034     Service_Affecting: False
3035     Suppression: False
3036     Management_Affecting_Severity: major
3037     Degrade_Affecting_Severity: none
3038
3039 800.104:
3040     Type: Alarm
3041     Description: |-
3042         Storage Alarm Condition:
3043         <storage-backend-name> configuration failed to apply on host: <host-uuid>.
3044     Entity_Instance_ID: storage_backend=<storage-backend-name>
3045     Severity: critical
3046     Proposed_Repair_Action: "Update backend setting to reapply configuration.
3047                              Consult the System Administration Manual for more details.
3048                              If problem persists, contact next level of support."
3049     Maintenance_Action:
3050     Inhibit_Alarms:
3051     Alarm_Type: equipment
3052     Probable_Cause: configuration-or-customization-error
3053     Service_Affecting: True
3054     Suppression: False
3055     Management_Affecting_Severity: major
3056     Degrade_Affecting_Severity: none
3057
3058 #---------------------------------------------------------------------------
3059 #   KUBERNETES
3060 #---------------------------------------------------------------------------
3061
3062 850.001:
3063     Type: Alarm
3064     Description: Persistent Volume Migration Error
3065     Entity_Instance_ID: kubernetes=PV-migration-failed
3066     Severity: major
3067     Proposed_Repair_Action: "Manually execute /usr/bin/ceph_k8s_update_monitors.sh
3068                              to confirm PVs are updated, then lock/unlock to clear
3069                              alarms. If problem persists, contact next level of
3070                              support."
3071     Maintenance_Action:
3072     Inhibit_Alarms:
3073     Alarm_Type: processing-error
3074     Probable_Cause: communication-subsystem-failure
3075     Service_Affecting: False
3076     Suppression: False
3077     Management_Affecting_Severity: none
3078     Degrade_Affecting_Severity: none
3079
3080 #---------------------------------------------------------------------------
3081 #   SOFTWARE
3082 #---------------------------------------------------------------------------
3083
3084 900.001:
3085     Type: Alarm
3086     Description: Patching operation in progress.
3087     Entity_Instance_ID: host=controller
3088     Severity: minor
3089     Proposed_Repair_Action: Complete reboots of affected hosts.
3090     Maintenance_Action:
3091     Inhibit_Alarms:
3092     Alarm_Type: environmental
3093     Probable_Cause: unspecified-reason
3094     Service_Affecting: False
3095     Suppression: False
3096     Management_Affecting_Severity: warning
3097     Degrade_Affecting_Severity: none
3098
3099 900.002:
3100     Type: Alarm
3101     Description: Patch host install failure. Command "sw-patch host-install" failed.
3102     Entity_Instance_ID: host=<hostname>
3103     Severity: major
3104     Proposed_Repair_Action: Undo patching operation. Check patch logs on the target host (i.e. /var/log/patching.log)
3105     Maintenance_Action:
3106     Inhibit_Alarms:
3107     Alarm_Type: environmental
3108     Probable_Cause: unspecified-reason
3109     Service_Affecting: False
3110     Suppression: False
3111     Management_Affecting_Severity: warning
3112     Degrade_Affecting_Severity: none
3113
3114 900.003:
3115     Type: Alarm
3116     Description: A patch with state 'obsolete' in its metadata has been uploaded.
3117     Entity_Instance_ID: host=controller
3118     Severity: warning
3119     Proposed_Repair_Action: Remove and delete obsolete patches.
3120     Maintenance_Action:
3121     Inhibit_Alarms:
3122     Alarm_Type: environmental
3123     Probable_Cause: unspecified-reason
3124     Service_Affecting: False
3125     Suppression: False
3126     Management_Affecting_Severity: warning
3127     Degrade_Affecting_Severity: none
3128
3129 900.004:
3130     Type: Alarm
3131     Description: The upgrade and running software version do not match. Command host-upgrade failed.
3132     Entity_Instance_ID: host=<hostname>
3133     Severity: major
3134     Proposed_Repair_Action: Reinstall host to update applied load.
3135     Maintenance_Action:
3136     Inhibit_Alarms:
3137     Alarm_Type: operational-violation
3138     Probable_Cause: unspecified-reason
3139     Service_Affecting: True
3140     Suppression: False
3141     Management_Affecting_Severity: warning
3142     Degrade_Affecting_Severity: none
3143
3144 900.005:
3145     Type: Alarm
3146     Description: System Upgrade in progress.
3147     Entity_Instance_ID: host=controller
3148     Severity: minor
3149     Proposed_Repair_Action: No action required.
3150     Maintenance_Action:
3151     Inhibit_Alarms:
3152     Alarm_Type: operational-violation
3153     Probable_Cause: unspecified-reason
3154     Service_Affecting: False
3155     Suppression: False
3156     Management_Affecting_Severity: warning
3157     Degrade_Affecting_Severity: none
3158
3159 900.006:
3160     Type: Alarm
3161     Description: Device image update operation in progress.
3162     Entity_Instance_ID: host=controller
3163     Severity: minor
3164     Proposed_Repair_Action: Complete reboots of affected hosts.
3165     Maintenance_Action:
3166     Inhibit_Alarms:
3167     Alarm_Type: environmental
3168     Probable_Cause: unspecified-reason
3169     Service_Affecting: False
3170     Suppression: False
3171     Management_Affecting_Severity: warning
3172     Degrade_Affecting_Severity: none
3173
3174 900.007:
3175     Type: Alarm
3176     Description: Kubernetes upgrade in progress.
3177     Entity_Instance_ID: host=controller
3178     Severity: minor
3179     Proposed_Repair_Action: No action required.
3180     Maintenance_Action:
3181     Inhibit_Alarms:
3182     Alarm_Type: operational-violation
3183     Probable_Cause: unspecified-reason
3184     Service_Affecting: False
3185     Suppression: False
3186     Management_Affecting_Severity: warning
3187     Degrade_Affecting_Severity: none
3188
3189 900.008:
3190     Type: Alarm
3191     Description: Kubernetes rootca update in progress
3192     Entity_Instance_ID: host=controller
3193     Severity: minor
3194     Proposed_Repair_Action: Wait for kubernetes rootca procedure to complete
3195     Maintenance_Action:
3196     Inhibit_Alarms:
3197     Alarm_Type: operational-violation
3198     Probable_Cause: unspecified-reason
3199     Service_Affecting: False
3200     Suppression: False
3201     Management_Affecting_Severity: warning
3202     Degrade_Affecting_Severity: none
3203
3204 900.009:
3205     Type: Alarm
3206     Description: Kubernetes root CA update aborted, certificates may not be fully updated. Command "system kube-rootca-update-abort" has been run.
3207     Entity_Instance_ID: host=controller
3208     Severity: minor
3209     Proposed_Repair_Action: Fully update certificates by a new root CA update.
3210     Maintenance_Action:
3211     Inhibit_Alarms:
3212     Alarm_Type: operational-violation
3213     Probable_Cause: unspecified-reason
3214     Service_Affecting: False
3215     Suppression: False
3216     Management_Affecting_Severity: warning
3217     Degrade_Affecting_Severity: none
3218
3219 900.101:
3220     Type: Alarm
3221     Description: Software patch auto-apply inprogress
3222     Entity_Instance_ID: orchestration=sw-patch
3223     Severity: major
3224     Proposed_Repair_Action: Wait for software patch auto-apply to complete; if problem persists contact next level of support
3225     Maintenance_Action:
3226     Inhibit_Alarms:
3227     Alarm_Type: equipment
3228     Probable_Cause: unspecified-reason
3229     Service_Affecting: True
3230     Suppression: True
3231     Management_Affecting_Severity: warning
3232     Degrade_Affecting_Severity: none
3233
3234 900.102:
3235     Type: Alarm
3236     Description: Software patch auto-apply aborting
3237     Entity_Instance_ID: orchestration=sw-patch
3238     Severity: major
3239     Proposed_Repair_Action: Wait for software patch auto-apply abort to complete; if problem persists contact next level of support
3240     Maintenance_Action:
3241     Inhibit_Alarms:
3242     Alarm_Type: equipment
3243     Probable_Cause: unspecified-reason
3244     Service_Affecting: True
3245     Suppression: True
3246     Management_Affecting_Severity: warning
3247     Degrade_Affecting_Severity: none
3248
3249 900.103:
3250     Type: Alarm
3251     Description: Software patch auto-apply failed. Command "sw-manager patch-strategy apply" failed.
3252     Entity_Instance_ID: orchestration=sw-patch
3253     Severity: critical
3254     Proposed_Repair_Action: Attempt to apply software patches manually; if problem persists contact next level of support
3255     Maintenance_Action:
3256     Inhibit_Alarms:
3257     Alarm_Type: equipment
3258     Probable_Cause: underlying-resource-unavailable
3259     Service_Affecting: True
3260     Suppression: True
3261     Management_Affecting_Severity: warning
3262     Degrade_Affecting_Severity: none
3263
3264 900.111:
3265     Type: Log
3266     Description: Software patch auto-apply start
3267     Entity_Instance_ID: orchestration=sw-patch
3268     Severity: critical
3269     Alarm_Type: equipment
3270     Probable_Cause: unspecified-reason
3271     Service_Affecting: False
3272
3273 900.112:
3274     Type: Log
3275     Description: Software patch auto-apply inprogress
3276     Entity_Instance_ID: orchestration=sw-patch
3277     Severity: critical
3278     Alarm_Type: equipment
3279     Probable_Cause: unspecified-reason
3280     Service_Affecting: False
3281
3282 900.113:
3283     Type: Log
3284     Description: Software patch auto-apply rejected
3285     Entity_Instance_ID: orchestration=sw-patch
3286     Severity: critical
3287     Alarm_Type: equipment
3288     Probable_Cause: unspecified-reason
3289     Service_Affecting: False
3290
3291 900.114:
3292     Type: Log
3293     Description: Software patch auto-apply cancelled
3294     Entity_Instance_ID: orchestration=sw-patch
3295     Severity: critical
3296     Alarm_Type: equipment
3297     Probable_Cause: unspecified-reason
3298     Service_Affecting: False
3299
3300 900.115:
3301     Type: Log
3302     Description: Software patch auto-apply failed
3303     Entity_Instance_ID: orchestration=sw-patch
3304     Severity: critical
3305     Alarm_Type: equipment
3306     Probable_Cause: unspecified-reason
3307     Service_Affecting: False
3308
3309 900.116:
3310     Type: Log
3311     Description: Software patch auto-apply completed
3312     Entity_Instance_ID: orchestration=sw-patch
3313     Severity: critical
3314     Alarm_Type: equipment
3315     Probable_Cause: unspecified-reason
3316     Service_Affecting: False
3317
3318 900.117:
3319     Type: Log
3320     Description: Software patch auto-apply abort
3321     Entity_Instance_ID: orchestration=sw-patch
3322     Severity: critical
3323     Alarm_Type: equipment
3324     Probable_Cause: unspecified-reason
3325     Service_Affecting: False
3326
3327 900.118:
3328     Type: Log
3329     Description: Software patch auto-apply aborting
3330     Entity_Instance_ID: orchestration=sw-patch
3331     Severity: critical
3332     Alarm_Type: equipment
3333     Probable_Cause: unspecified-reason
3334     Service_Affecting: False
3335
3336 900.119:
3337     Type: Log
3338     Description: Software patch auto-apply abort rejected
3339     Entity_Instance_ID: orchestration=sw-patch
3340     Severity: critical
3341     Alarm_Type: equipment
3342     Probable_Cause: unspecified-reason
3343     Service_Affecting: False
3344
3345 900.120:
3346     Type: Log
3347     Description: Software patch auto-apply abort failed
3348     Entity_Instance_ID: orchestration=sw-patch
3349     Severity: critical
3350     Alarm_Type: equipment
3351     Probable_Cause: unspecified-reason
3352     Service_Affecting: False
3353
3354 900.121:
3355     Type: Log
3356     Description: Software patch auto-apply aborted
3357     Entity_Instance_ID: orchestration=sw-patch
3358     Severity: critical
3359     Alarm_Type: equipment
3360     Probable_Cause: unspecified-reason
3361     Service_Affecting: False
3362
3363 900.201:
3364     Type: Alarm
3365     Description: Software upgrade auto-apply inprogress
3366     Entity_Instance_ID: orchestration=sw-upgrade
3367     Severity: major
3368     Proposed_Repair_Action: Wait for software upgrade auto-apply to complete; if problem persists contact next level of support
3369     Maintenance_Action:
3370     Inhibit_Alarms:
3371     Alarm_Type: equipment
3372     Probable_Cause: unspecified-reason
3373     Service_Affecting: True
3374     Suppression: True
3375     Management_Affecting_Severity: warning
3376     Degrade_Affecting_Severity: none
3377
3378 900.202:
3379     Type: Alarm
3380     Description: Software upgrade auto-apply aborting
3381     Entity_Instance_ID: orchestration=sw-upgrade
3382     Severity: major
3383     Proposed_Repair_Action: Wait for software upgrade auto-apply abort to complete; if problem persists contact next level of support
3384     Maintenance_Action:
3385     Inhibit_Alarms:
3386     Alarm_Type: equipment
3387     Probable_Cause: unspecified-reason
3388     Service_Affecting: True
3389     Suppression: True
3390     Management_Affecting_Severity: warning
3391     Degrade_Affecting_Severity: none
3392
3393 900.203:
3394     Type: Alarm
3395     Description: Software upgrade auto-apply failed. Command "sw-manager update-strategy apply" failed
3396     Entity_Instance_ID: orchestration=sw-upgrade
3397     Severity: critical
3398     Proposed_Repair_Action: Attempt to apply software upgrade manually; if problem persists contact next level of support
3399     Maintenance_Action:
3400     Inhibit_Alarms:
3401     Alarm_Type: equipment
3402     Probable_Cause: underlying-resource-unavailable
3403     Service_Affecting: True
3404     Suppression: True
3405     Management_Affecting_Severity: warning
3406     Degrade_Affecting_Severity: none
3407
3408 900.211:
3409     Type: Log
3410     Description: Software upgrade auto-apply start
3411     Entity_Instance_ID: orchestration=sw-upgrade
3412     Severity: critical
3413     Alarm_Type: equipment
3414     Probable_Cause: unspecified-reason
3415     Service_Affecting: False
3416
3417 900.212:
3418     Type: Log
3419     Description: Software upgrade auto-apply inprogress
3420     Entity_Instance_ID: orchestration=sw-upgrade
3421     Severity: critical
3422     Alarm_Type: equipment
3423     Probable_Cause: unspecified-reason
3424     Service_Affecting: False
3425
3426 900.213:
3427     Type: Log
3428     Description: Software upgrade auto-apply rejected
3429     Entity_Instance_ID: orchestration=sw-upgrade
3430     Severity: critical
3431     Alarm_Type: equipment
3432     Probable_Cause: unspecified-reason
3433     Service_Affecting: False
3434
3435 900.214:
3436     Type: Log
3437     Description: Software upgrade auto-apply cancelled
3438     Entity_Instance_ID: orchestration=sw-upgrade
3439     Severity: critical
3440     Alarm_Type: equipment
3441     Probable_Cause: unspecified-reason
3442     Service_Affecting: False
3443
3444 900.215:
3445     Type: Log
3446     Description: Software upgrade auto-apply failed
3447     Entity_Instance_ID: orchestration=sw-upgrade
3448     Severity: critical
3449     Alarm_Type: equipment
3450     Probable_Cause: unspecified-reason
3451     Service_Affecting: False
3452
3453 900.216:
3454     Type: Log
3455     Description: Software upgrade auto-apply completed
3456     Entity_Instance_ID: orchestration=sw-upgrade
3457     Severity: critical
3458     Alarm_Type: equipment
3459     Probable_Cause: unspecified-reason
3460     Service_Affecting: False
3461
3462 900.217:
3463     Type: Log
3464     Description: Software upgrade auto-apply abort
3465     Entity_Instance_ID: orchestration=sw-upgrade
3466     Severity: critical
3467     Alarm_Type: equipment
3468     Probable_Cause: unspecified-reason
3469     Service_Affecting: False
3470
3471 900.218:
3472     Type: Log
3473     Description: Software upgrade auto-apply aborting
3474     Entity_Instance_ID: orchestration=sw-upgrade
3475     Severity: critical
3476     Alarm_Type: equipment
3477     Probable_Cause: unspecified-reason
3478     Service_Affecting: False
3479
3480 900.219:
3481     Type: Log
3482     Description: Software upgrade auto-apply abort rejected
3483     Entity_Instance_ID: orchestration=sw-upgrade
3484     Severity: critical
3485     Alarm_Type: equipment
3486     Probable_Cause: unspecified-reason
3487     Service_Affecting: False
3488
3489 900.220:
3490     Type: Log
3491     Description: Software upgrade auto-apply abort failed
3492     Entity_Instance_ID: orchestration=sw-upgrade
3493     Severity: critical
3494     Alarm_Type: equipment
3495     Probable_Cause: unspecified-reason
3496     Service_Affecting: False
3497
3498 900.221:
3499     Type: Log
3500     Description: Software upgrade auto-apply aborted
3501     Entity_Instance_ID: orchestration=sw-upgrade
3502     Severity: critical
3503     Alarm_Type: equipment
3504     Probable_Cause: unspecified-reason
3505     Service_Affecting: False
3506
3507 900.301:
3508     Type: Alarm
3509     Description: Firmware Update auto-apply inprogress
3510     Entity_Instance_ID: orchestration=fw-update
3511     Severity: major
3512     Proposed_Repair_Action: Wait for firmware update auto-apply to complete; if problem persists contact next level of support
3513     Maintenance_Action:
3514     Inhibit_Alarms:
3515     Alarm_Type: equipment
3516     Probable_Cause: unspecified-reason
3517     Service_Affecting: True
3518     Suppression: True
3519     Management_Affecting_Severity: warning
3520     Degrade_Affecting_Severity: none
3521
3522 900.302:
3523     Type: Alarm
3524     Description: Firmware Update auto-apply aborting
3525     Entity_Instance_ID: orchestration=fw-update
3526     Severity: major
3527     Proposed_Repair_Action: Wait for firmware update auto-apply abort to complete; if problem persists contact next level of support
3528     Maintenance_Action:
3529     Inhibit_Alarms:
3530     Alarm_Type: equipment
3531     Probable_Cause: unspecified-reason
3532     Service_Affecting: True
3533     Suppression: True
3534     Management_Affecting_Severity: warning
3535     Degrade_Affecting_Severity: none
3536
3537 900.303:
3538     Type: Alarm
3539     Description: Firmware Update auto-apply failed. Command "sw-manager kube-rootca-update-strategy apply" failed.
3540     Entity_Instance_ID: orchestration=fw-update
3541     Severity: critical
3542     Proposed_Repair_Action: Attempt to apply firmware update manually; if problem persists contact next level of support
3543     Maintenance_Action:
3544     Inhibit_Alarms:
3545     Alarm_Type: equipment
3546     Probable_Cause: underlying-resource-unavailable
3547     Service_Affecting: True
3548     Suppression: True
3549     Management_Affecting_Severity: warning
3550     Degrade_Affecting_Severity: none
3551
3552 900.311:
3553     Type: Log
3554     Description: Firmware update auto-apply start
3555     Entity_Instance_ID: orchestration=fw-update
3556     Severity: critical
3557     Alarm_Type: equipment
3558     Probable_Cause: unspecified-reason
3559     Service_Affecting: False
3560
3561 900.312:
3562     Type: Log
3563     Description: Firmware update auto-apply inprogress
3564     Entity_Instance_ID: orchestration=fw-update
3565     Severity: critical
3566     Alarm_Type: equipment
3567     Probable_Cause: unspecified-reason
3568     Service_Affecting: False
3569
3570 900.313:
3571     Type: Log
3572     Description: Firmware update auto-apply rejected
3573     Entity_Instance_ID: orchestration=fw-update
3574     Severity: critical
3575     Alarm_Type: equipment
3576     Probable_Cause: unspecified-reason
3577     Service_Affecting: False
3578
3579 900.314:
3580     Type: Log
3581     Description: Firmware update auto-apply cancelled
3582     Entity_Instance_ID: orchestration=fw-update
3583     Severity: critical
3584     Alarm_Type: equipment
3585     Probable_Cause: unspecified-reason
3586     Service_Affecting: False
3587
3588 900.315:
3589     Type: Log
3590     Description: Firmware update auto-apply failed
3591     Entity_Instance_ID: orchestration=fw-update
3592     Severity: critical
3593     Alarm_Type: equipment
3594     Probable_Cause: unspecified-reason
3595     Service_Affecting: False
3596
3597 900.316:
3598     Type: Log
3599     Description: Firmware update auto-apply completed
3600     Entity_Instance_ID: orchestration=fw-update
3601     Severity: critical
3602     Alarm_Type: equipment
3603     Probable_Cause: unspecified-reason
3604     Service_Affecting: False
3605
3606 900.317:
3607     Type: Log
3608     Description: Firmware update auto-apply abort
3609     Entity_Instance_ID: orchestration=fw-update
3610     Severity: critical
3611     Alarm_Type: equipment
3612     Probable_Cause: unspecified-reason
3613     Service_Affecting: False
3614
3615 900.318:
3616     Type: Log
3617     Description: Firmware update auto-apply aborting
3618     Entity_Instance_ID: orchestration=fw-update
3619     Severity: critical
3620     Alarm_Type: equipment
3621     Probable_Cause: unspecified-reason
3622     Service_Affecting: False
3623
3624 900.319:
3625     Type: Log
3626     Description: Firmware update auto-apply abort rejected
3627     Entity_Instance_ID: orchestration=fw-update
3628     Severity: critical
3629     Alarm_Type: equipment
3630     Probable_Cause: unspecified-reason
3631     Service_Affecting: False
3632
3633 900.320:
3634     Type: Log
3635     Description: Firmware update auto-apply abort failed
3636     Entity_Instance_ID: orchestration=fw-update
3637     Severity: critical
3638     Alarm_Type: equipment
3639     Probable_Cause: unspecified-reason
3640     Service_Affecting: False
3641
3642 900.321:
3643     Type: Log
3644     Description: Firmware update auto-apply aborted
3645     Entity_Instance_ID: orchestration=fw-update
3646     Severity: critical
3647     Alarm_Type: equipment
3648     Probable_Cause: unspecified-reason
3649     Service_Affecting: False
3650
3651 900.401:
3652     Type: Alarm
3653     Description: Kubernetes upgrade auto-apply inprogress
3654     Entity_Instance_ID: orchestration=kube-upgrade
3655     Severity: major
3656     Proposed_Repair_Action: Wait for kubernetes upgrade auto-apply to complete; if problem persists contact next level of support
3657     Maintenance_Action:
3658     Inhibit_Alarms:
3659     Alarm_Type: equipment
3660     Probable_Cause: unspecified-reason
3661     Service_Affecting: True
3662     Suppression: True
3663     Management_Affecting_Severity: warning
3664     Degrade_Affecting_Severity: none
3665
3666 900.402:
3667     Type: Alarm
3668     Description: Kubernetes upgrade auto-apply aborting
3669     Entity_Instance_ID: orchestration=kube-upgrade
3670     Severity: major
3671     Proposed_Repair_Action: Wait for kubernetes upgrade auto-apply abort to complete; if problem persists contact next level of support
3672     Maintenance_Action:
3673     Inhibit_Alarms:
3674     Alarm_Type: equipment
3675     Probable_Cause: unspecified-reason
3676     Service_Affecting: True
3677     Suppression: True
3678     Management_Affecting_Severity: warning
3679     Degrade_Affecting_Severity: none
3680
3681 900.403:
3682     Type: Alarm
3683     Description: Kubernetes upgrade auto-apply failed
3684     Entity_Instance_ID: orchestration=kube-upgrade
3685     Severity: critical
3686     Proposed_Repair_Action: Attempt to apply kubernetes upgrade manually; if problem persists contact next level of support
3687     Maintenance_Action:
3688     Inhibit_Alarms:
3689     Alarm_Type: equipment
3690     Probable_Cause: underlying-resource-unavailable
3691     Service_Affecting: True
3692     Suppression: True
3693     Management_Affecting_Severity: warning
3694     Degrade_Affecting_Severity: none
3695
3696 900.411:
3697     Type: Log
3698     Description: Kubernetes upgrade auto-apply start
3699     Entity_Instance_ID: orchestration=kube-upgrade
3700     Severity: critical
3701     Alarm_Type: equipment
3702     Probable_Cause: unspecified-reason
3703     Service_Affecting: False
3704
3705 900.412:
3706     Type: Log
3707     Description: Kubernetes upgrade auto-apply inprogress
3708     Entity_Instance_ID: orchestration=kube-upgrade
3709     Severity: critical
3710     Alarm_Type: equipment
3711     Probable_Cause: unspecified-reason
3712     Service_Affecting: False
3713
3714 900.413:
3715     Type: Log
3716     Description: Kubernetes upgrade auto-apply rejected
3717     Entity_Instance_ID: orchestration=kube-upgrade
3718     Severity: critical
3719     Alarm_Type: equipment
3720     Probable_Cause: unspecified-reason
3721     Service_Affecting: False
3722
3723 900.414:
3724     Type: Log
3725     Description: Kubernetes upgrade auto-apply cancelled
3726     Entity_Instance_ID: orchestration=kube-upgrade
3727     Severity: critical
3728     Alarm_Type: equipment
3729     Probable_Cause: unspecified-reason
3730     Service_Affecting: False
3731
3732 900.415:
3733     Type: Log
3734     Description: Kubernetes upgrade auto-apply failed
3735     Entity_Instance_ID: orchestration=kube-upgrade
3736     Severity: critical
3737     Alarm_Type: equipment
3738     Probable_Cause: unspecified-reason
3739     Service_Affecting: False
3740
3741 900.416:
3742     Type: Log
3743     Description: Kubernetes upgrade auto-apply completed
3744     Entity_Instance_ID: orchestration=kube-upgrade
3745     Severity: critical
3746     Alarm_Type: equipment
3747     Probable_Cause: unspecified-reason
3748     Service_Affecting: False
3749
3750 900.417:
3751     Type: Log
3752     Description: Kubernetes upgrade auto-apply abort
3753     Entity_Instance_ID: orchestration=kube-upgrade
3754     Severity: critical
3755     Alarm_Type: equipment
3756     Probable_Cause: unspecified-reason
3757     Service_Affecting: False
3758
3759 900.418:
3760     Type: Log
3761     Description: Kubernetes upgrade auto-apply aborting
3762     Entity_Instance_ID: orchestration=kube-upgrade
3763     Severity: critical
3764     Alarm_Type: equipment
3765     Probable_Cause: unspecified-reason
3766     Service_Affecting: False
3767
3768 900.419:
3769     Type: Log
3770     Description: Kubernetes upgrade auto-apply abort rejected
3771     Entity_Instance_ID: orchestration=kube-upgrade
3772     Severity: critical
3773     Alarm_Type: equipment
3774     Probable_Cause: unspecified-reason
3775     Service_Affecting: False
3776
3777 900.420:
3778     Type: Log
3779     Description: Kubernetes upgrade auto-apply abort failed
3780     Entity_Instance_ID: orchestration=kube-upgrade
3781     Severity: critical
3782     Alarm_Type: equipment
3783     Probable_Cause: unspecified-reason
3784     Service_Affecting: False
3785
3786 900.421:
3787     Type: Log
3788     Description: Kubernetes upgrade auto-apply aborted
3789     Entity_Instance_ID: orchestration=kube-upgrade
3790     Severity: critical
3791     Alarm_Type: equipment
3792     Probable_Cause: unspecified-reason
3793     Service_Affecting: False
3794
3795 900.501:
3796     Type: Alarm
3797     Description: Kubernetes rootca update auto-apply inprogress
3798     Entity_Instance_ID: orchestration=kube-rootca-update
3799     Severity: major
3800     Proposed_Repair_Action: Wait for kubernetes rootca update auto-apply to complete; if problem persists contact next level of support
3801     Maintenance_Action:
3802     Inhibit_Alarms:
3803     Alarm_Type: equipment
3804     Probable_Cause: unspecified-reason
3805     Service_Affecting: True
3806     Suppression: True
3807     Management_Affecting_Severity: warning
3808     Degrade_Affecting_Severity: none
3809
3810 900.502:
3811     Type: Alarm
3812     Description: Kubernetes rootca update auto-apply aborting
3813     Entity_Instance_ID: orchestration=kube-rootca-update
3814     Severity: major
3815     Proposed_Repair_Action: Wait for kubernetes rootca update auto-apply abort to complete; if problem persists contact next level of support
3816     Maintenance_Action:
3817     Inhibit_Alarms:
3818     Alarm_Type: equipment
3819     Probable_Cause: unspecified-reason
3820     Service_Affecting: True
3821     Suppression: True
3822     Management_Affecting_Severity: warning
3823     Degrade_Affecting_Severity: none
3824
3825 900.503:
3826     Type: Alarm
3827     Description: Kubernetes rootca update auto-apply failed. Command "sw-manager kube-upgrade-strategy apply" failed.
3828     Entity_Instance_ID: orchestration=kube-rootca-update
3829     Severity: critical
3830     Proposed_Repair_Action: Attempt to apply kubernetes rootca update manually; if problem persists contact next level of support
3831     Maintenance_Action:
3832     Inhibit_Alarms:
3833     Alarm_Type: equipment
3834     Probable_Cause: underlying-resource-unavailable
3835     Service_Affecting: True
3836     Suppression: True
3837     Management_Affecting_Severity: warning
3838     Degrade_Affecting_Severity: none
3839
3840 900.511:
3841     Type: Log
3842     Description: Kubernetes rootca update auto-apply start
3843     Entity_Instance_ID: orchestration=kube-rootca-update
3844     Severity: critical
3845     Alarm_Type: equipment
3846     Probable_Cause: unspecified-reason
3847     Service_Affecting: False
3848
3849 900.512:
3850     Type: Log
3851     Description: Kubernetes rootca update auto-apply inprogress
3852     Entity_Instance_ID: orchestration=kube-rootca-update
3853     Severity: critical
3854     Alarm_Type: equipment
3855     Probable_Cause: unspecified-reason
3856     Service_Affecting: False
3857
3858 900.513:
3859     Type: Log
3860     Description: Kubernetes rootca update auto-apply rejected
3861     Entity_Instance_ID: orchestration=kube-rootca-update
3862     Severity: critical
3863     Alarm_Type: equipment
3864     Probable_Cause: unspecified-reason
3865     Service_Affecting: False
3866
3867 900.514:
3868     Type: Log
3869     Description: Kubernetes rootca update auto-apply cancelled
3870     Entity_Instance_ID: orchestration=kube-rootca-update
3871     Severity: critical
3872     Alarm_Type: equipment
3873     Probable_Cause: unspecified-reason
3874     Service_Affecting: False
3875
3876 900.515:
3877     Type: Log
3878     Description: Kubernetes rootca update auto-apply failed
3879     Entity_Instance_ID: orchestration=kube-rootca-update
3880     Severity: critical
3881     Alarm_Type: equipment
3882     Probable_Cause: unspecified-reason
3883     Service_Affecting: False
3884
3885 900.516:
3886     Type: Log
3887     Description: Kubernetes rootca update auto-apply completed
3888     Entity_Instance_ID: orchestration=kube-rootca-update
3889     Severity: critical
3890     Alarm_Type: equipment
3891     Probable_Cause: unspecified-reason
3892     Service_Affecting: False
3893
3894 900.517:
3895     Type: Log
3896     Description: Kubernetes rootca update auto-apply abort
3897     Entity_Instance_ID: orchestration=kube-rootca-update
3898     Severity: critical
3899     Alarm_Type: equipment
3900     Probable_Cause: unspecified-reason
3901     Service_Affecting: False
3902
3903 900.518:
3904     Type: Log
3905     Description: Kubernetes rootca update auto-apply aborting
3906     Entity_Instance_ID: orchestration=kube-rootca-update
3907     Severity: critical
3908     Alarm_Type: equipment
3909     Probable_Cause: unspecified-reason
3910     Service_Affecting: False
3911
3912 900.519:
3913     Type: Log
3914     Description: Kubernetes rootca update auto-apply abort rejected
3915     Entity_Instance_ID: orchestration=kube-rootca-update
3916     Severity: critical
3917     Alarm_Type: equipment
3918     Probable_Cause: unspecified-reason
3919     Service_Affecting: False
3920
3921 900.520:
3922     Type: Log
3923     Description: Kubernetes rootca update auto-apply abort failed
3924     Entity_Instance_ID: orchestration=kube-rootca-update
3925     Severity: critical
3926     Alarm_Type: equipment
3927     Probable_Cause: unspecified-reason
3928     Service_Affecting: False
3929
3930 900.521:
3931     Type: Log
3932     Description: Kubernetes rootca update auto-apply aborted
3933     Entity_Instance_ID: orchestration=kube-rootca-update
3934     Severity: critical
3935     Alarm_Type: equipment
3936     Probable_Cause: unspecified-reason
3937     Service_Affecting: False
3938 ...